From: <leo...@us...> - 2010-01-05 16:39:34
|
Revision: 2205 http://aperture.svn.sourceforge.net/aperture/?rev=2205&view=rev Author: leo_sauermann Date: 2010-01-05 16:39:25 +0000 (Tue, 05 Jan 2010) Log Message: ----------- fixed aperture-webserver to work with latest aperture 1.4.0 release. works. Modified Paths: -------------- aperture-webserver/trunk/README.txt aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java Modified: aperture-webserver/trunk/README.txt =================================================================== --- aperture-webserver/trunk/README.txt 2010-01-05 16:35:33 UTC (rev 2204) +++ aperture-webserver/trunk/README.txt 2010-01-05 16:39:25 UTC (rev 2205) @@ -1,13 +1,17 @@ Aperture Webserver ================== -Date: 3.8.2007 -Authors: Benjamin Horak, Leo Sauermann -Copyright: DFKI GmbH +Date: 5.1.2010 +Authors: Leo Sauermann, Benjamin Horak +Copyright: DFKI GmbH and NEPOMUK consortium for parts +License: BSD -Features: +Aperture Webserver is a web application that provides the features of Aperture as a service. + +Features +======== - extract plaintext and RDF from files -Installation: +== Installation == - put the WAR into a Tomcat >= 5.5 - restart Tomcat - go to the website http://yourtomcathost/aperture-webserver/ @@ -16,15 +20,13 @@ ================ - Datasource configuration - Crawler -- web search engine? sparql endpoint? -(but this leads to a full Aduna Metadata Server, -which we don't need to replicate) Development =========== - checkout aperture-webserver using Eclipse >= 3.2, you need the J2EE Web Developer Tools (WST) -- get the latest aperture release -- copy all aperture JARs into the WebContent/WEB-INF/lib folder +- get the latest aperture release: aperture 1.4.0 +- copy all aperture JARs into the WebContent/WEB-INF/lib folder (use the aperture-runtime-onejar-1.4.0 version) + the needed files are listed in the svn:ignore property of the lib folder. Building Releases ================= Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java 2010-01-05 16:35:33 UTC (rev 2204) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java 2010-01-05 16:39:25 UTC (rev 2205) @@ -41,7 +41,7 @@ import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerFactoryImpl; import org.semanticdesktop.aperture.util.IOUtil; -import org.semanticdesktop.aperture.vocabulary.DATA; +import org.semanticdesktop.aperture.vocabulary.NIE; /** * Servlet implementation class for Servlet: FileInspector @@ -276,9 +276,9 @@ extractor.extract(container.getDescribedUri(), buffer, null, mimeType, container); } // add the MIME type as an additional statement to the RDF model - container.add(DATA.mimeType, mimeType); + container.add(NIE.mimeType, mimeType); for (String link : hyperlinks) { - container.add(DATA.keyword, link); + container.add(NIE.keyword, link); } // report the output to System.out container.getModel().writeTo(stringWriter, Syntax.RdfXml); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-01-19 11:34:34
|
Revision: 2212 http://aperture.svn.sourceforge.net/aperture/?rev=2212&view=rev Author: leo_sauermann Date: 2010-01-19 11:34:07 +0000 (Tue, 19 Jan 2010) Log Message: ----------- added the source of the nepomuk aperture server. Modified Paths: -------------- aperture-webserver/trunk/WebContent/WEB-INF/web.xml Added Paths: ----------- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateEvent.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateListener.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/CrawlerStateBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangeListener.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangedEvent.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ResourceBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/STATE.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ServerMXBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/StateChangeListener.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/StateChangedEvent.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourceInformation.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourcePool.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/ServerAccessData.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/AccessingResourceFailedException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/DataSourceAlreadyPresentException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/DataSourceNotFoundException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/DataSourceNotLockedException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/RefreshingResourceFailedException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/ResourceOpeningFailedException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureCrawlerFactory.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureRegistriesAggregate.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/thread/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/thread/CrawlingThread.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/thread/CrawlingThreadPool.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/thread/MonitoringThread.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java Modified: aperture-webserver/trunk/WebContent/WEB-INF/web.xml =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-01-19 11:31:13 UTC (rev 2211) +++ aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-01-19 11:34:07 UTC (rev 2212) @@ -12,10 +12,24 @@ <servlet-class> org.semanticdesktop.aperture.servlet.FileInspector</servlet-class> </servlet> + <servlet> + <description> + starts and stops the aperture server</description> + <display-name> + ApertureServerServlet</display-name> + <servlet-name>ApertureServerServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.servlet.ApertureServerServlet</servlet-class> + <load-on-startup>1</load-on-startup> + </servlet> <servlet-mapping> <servlet-name>FileInspector</servlet-name> <url-pattern>/FileInspector</url-pattern> </servlet-mapping> + <servlet-mapping> + <servlet-name>ApertureServerServlet</servlet-name> + <url-pattern>/ApertureServer</url-pattern> + </servlet-mapping> <welcome-file-list> <welcome-file>index.jsp</welcome-file> </welcome-file-list> Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,552 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + +import java.util.List; + +import org.semanticdesktop.aperture.datasource.DataSourceFactory; +import org.semanticdesktop.aperture.detector.DataSourceDescription; +import org.semanticdesktop.aperture.vocabulary.NIE; + +/** + * The main interface of the Aperture Server for the Aperture Framework. + * It serves as a facade to the whole functionality provided by Aperture. + * <h2>Threads</h2> + * <p>The methods {@link #crawlAllDatasources()} {@link #crawlDatasource(String)}, + * and {@link #start()} are non-blocking. They will return immediately. + * Once the server runs, its status can be queried using + * {@link #getCrawlerState(String)} and {@link #getOverallState()}. + * <p> + * ApertureDataWraper registers listeners of DataSourcesChange, which occurs when DataSources are removed, updated or added. + * <p> + * The monitoring can also be triggered manually. + * </p> + * + * <p> + * Many ideas for this class have been inspired (or sometimes impudently + * copy-pasted) from the Gnowsis Project <a href="http://www.gnowsis.org"> + * www.gnowsis.org</a>. More specifically from + * + * <a href="https://gnowsis.opendfki.de/repos/gnowsis/trunk/gnowsis-server/src/java/org/gnowsis/data/ApertureDataSourceRegistry.java"> + * here</a> and + * <a href="https://gnowsis.opendfki.de/repos/gnowsis/trunk/gnowsis-server/src/java/org/gnowsis/data/ApertureSynchroniser.java"> + * here</a> + * + * <h2><a name="rdfmime">RDF Mime/Type</a></h2> + * <p> + * The format of the serialisation, possible values are + * <ul> + * <li>application/rdf+xml - rdf/xml</li> + * <li>application/x-turtle - Turtle</li> + * <li>text/rdf+n3 - N3</li> + * <li>application/trix - TRIX</li> + * </ul> + * </p> + * + * @see <a href="http://aperture.sourceforge.net"> + * http://aperture.sourceforge.net + * </a> + * + * @author <a href="mailto:ant...@df...">Antoni Mylka</a> + * + */ + +public interface ApertureServer { + + /** + * The constant used for the name of a data source that doesn't have + * a descriptive name specified in it's rdf configuration. + */ + public static final String UNNAMED_SOURCE = "No name data source"; + + /** + * The time the server will wait for a monitoring thread to finish. + */ + public static final long THREAD_JOIN_TIMEOUT = 10000; + + /** + * Default timeout between two crawls of the same data source. This value + * is used if the configuration of a datasource doesn't provide it's own. + */ + public static final long DEFAULT_MONITORING_TIMEOUT = 60000; + + /** + * The default number of threads used by the monitoring subsytem. + */ + public static final int DEFAULT_MONITORING_THREAD_POOL_SIZE = 2; + + //////////////////////////////////////////////////////////////////////// + ////////////////////////////// DESTRUCTION ///////////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Closes the server and releases any resources it might + * hold. + */ + public void close(); + + //////////////////////////////////////////////////////////////////////// + ///////////////////////// MONITORING METHODS /////////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Starts monitoring the predefined data sources. This method launches a + * thread that runs the crawlers, waits for a specified time interval and + * reruns the crawlers, replacing the information in the repository about + * objects that have changed, with new version. + */ + void start(); + + /** + * Stops the monitoring process. Any crawlers that are currently running are + * stopped and the waiting thread is terminated. No further crawling will + * take place unless startMonitoring() is invoked. + */ + void stop(); + + /** + * Stops current crawlers and restarts the crawling process. This method + * can be used to force the server to take new configuration into + * account. + */ + void restart(); + + /** + * Checks if the monitoring thread is currently running. A positive answer + * indicates that either some sources are being crawled at the moment, or + * the server is idle between scheduled crawls. + * + * @return true if data sources are currently being monitored<br> + * false otherwise + */ + boolean isRunning(); + + + //////////////////////////////////////////////////////////////////////// + //////////////////// MANUAL TRIGGERING OF CRAWLERS ///////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Visit all datasources NOW. + * + * This will launch a separate thread for each datasource and will + * return immediately. Use getState, getProgress and getCrawlReport to check + * how it's doing. + * + * @throws ApertureServerException + * The server is in monitoring mode. + */ + public void crawlAllDatasources() throws ApertureServerException; + + /** + * Crawl the datasource identified by the URI. If the datasource is already + * being crawled - nothing happens. + * This method is non-blocking and will return immediately, starting + * the crawl in an extra thread. + * @param datasourceURI The identifier of the data source to be crawled. + * @return true if a crawl has begun or false if a crawl was already + * underway. + * @throws ApertureServerException if the data source under the given + * URI hasn't been found or the crawling could not be started for + * some other reason. + */ + public boolean crawlDatasource(String datasourceURI) + throws ApertureServerException; + + /** + * Stop crawling the given data source. + * + * @param datasourceURI The identifier of the data source to be crawled. + * @return true if the source was actually being crawled and the crawling + * had stopped. False if the source wasn't being crawled and there + * was nothing to stop. + * @throws ApertureServerException if the data source under the given + * URI hasn't been found or some other error occured. + */ + public boolean stopCrawlingDatasource(String datasourceURI) + throws ApertureServerException; + + + //////////////////////////////////////////////////////////////////////// + ///////////////////// DATA SOURCE LIST MANIPULATION //////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Add a new data source. + * + * The source to be added is identified by the passed uri. The configuration + * is read from a given configuration string. The string is interpreted + * using the syntax specified in the provided mime type. The string expected + * to behave well. It should contain one rdf description of the datasource + * URI. It is likely that the configuration repository will become corrupt + * if something else is passed. + * + * @param datasourceURI + * The identifier of the source to be added. It is strongly recommended + * to use only the uris returned by {@link #generateDataSourceUri()} + * @param configurationRDF + * The configuration data about the data source. It has to + * contain the datasourceURI as resource. The source is configured + * using the vocabulary from the DATA ontology and possibly additional + * source-specific properties. + * @param mimeType + * The mime type of the serialization. See the documentation for + * RDFRepository interface for possible values. + * @throws ApertureServerException if a data source with the given + * URI is already present, the URI is faulty or an IO error occurs + */ + public void addDataSourceConfig(String datasourceURI, + String configurationRDF, String mimeType) + throws ApertureServerException; + + /** + * Update a registered data source. The source to be updated is identified + * by the passed uri identifier. New configuration is stored within the + * given rdf. This will also reload the datasource/adapter. + * + * @param datasourceURI the identifier of the data source to be updated + * @param newRDFConfiguration new configuration of the data source expressed + * in an RDF using the vocabulary from the DATA ontology. + * @param mimeType the mime type of the serialization. See the documentation + * for {@link RDFRepository} interface for possible values. + * @throws ApertureServerException if something goes wrong. Reasons + * might include the data source URI or the rdf configuration + * containing syntactical errors, or an unsupported mime type. + */ + public void updateDataSourceConfig(String datasourceURI, + String newRDFConfiguration, String mimeType) + throws ApertureServerException; + + /** + * Remove a datasource from the registry. Attention: the registry does not + * call release() on the datasource, instead it returns it to you to do + * this. + * + * @param datasourceURI The URI identifying the datasource + * @param deleteData A flag indicating if the data associated with this data + * source is to be deleted from the repository. + * @throws ApertureServerException if the uri is syntactically faulty + * or a datasource under the given uri doesn't exist in the system. + */ + public void removeDataSourceConfig(String datasourceURI, boolean deleteData) + throws ApertureServerException; + + /** + * Generates an URI that can be used to create a new data source. + * @return a data source URI + */ + public String generateDataSourceUri(); + + /** + * Detect possible data sources. + * The returned data sources can be already in the system, + * at the moment this interface does not provide means to check if + * an already added datasource is detected again. + * This method may take some time (1-10 seconds) + * @return a list of detected datasourcebeans. can be empty, but never null. + */ + public List<DataSourceDescription> detectPossibleDataSources(); + + /** + * Detect possible data sources. + * The returned data sources can be already in the system, + * at the moment this interface does not provide means to check if + * an already added datasource is detected again. + * This method may take some time (1-10 seconds) + * @param datasourcetype the URI identifying the datasourcetype. + * The existing datasourcetypes are listed in {@link #getDataSourceOntologyAndDescriptions(String)} + * and are subclasses of {@link NIE#DataSource}. + * @return a list of detected datasourcebeans. can be empty, but never null. + */ + public List<DataSourceDescription> detectPossibleDataSources(String datasourcetype); + + /** + * List all possible DataSource factories + * @return a list of factories. + */ + public List<DataSourceFactory> listDataSourceFactories(); + + //////////////////////////////////////////////////////////////////////// + ///////////////////// DATA SOURCE LIST BROWSING //////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * List the registered URIs of datasources. + * + * @return The strings identifying the data sources. + */ + public List<String> listDataSourceUris(); + + /** + * List only DataSources of a given type. + * + * @param typeURI + * The uri of the data source type. + * @return A list containing all implementing datasources. + */ + public List<String> listDataSourcesImplementing(String typeURI); + + /** + * Get the configuration of the passed data source URI. + * + * The configuration will contain the label of the data source, the + * classname of the adapter, etc. + * + * @param datasourceURI + * The uri of the data source + * @param mimeType + * The mime type of the serialization. See the documentation for + * RDFRepository interface for possible values + * @return a resource describing the data source, it has the passed URI + * @throws ApertureServerException A problem has occured. + */ + public String getDataSourceConfig(String datasourceURI, String mimeType) + throws ApertureServerException; + + /** + * Returns the number of known datasources. + * + * @return The number of known datasources. + */ + public long getDataSourceCount(); + + + /** + * Returns an RDF graph containing the datasource ontology (the vocabulary + * used to configure data sources), the sourceformat ontology (vocabulary + * used to specify widgets for the UI) and descriptions of all known data + * source types. These descriptions use the Fresnel Display Vocabulary. Each + * one comprises a fresnel:Lens instance, with a list of all configuration + * properties needed by a crawler for this data source. Each configuration + * property has a fresnel:Format that specifies the widget that is to be + * used in the user interface, the widgets are chosen from those specified + * in the Aperture sourceformat ontology. + * + * @param mimeType the constant specifying the syntax for the output. + * @return The string containing the RDF graph. + * @throws ApertureServerException if something goes wrong in the process. + */ + public String getDataSourceOntologyAndDescriptions( + String mimeType) throws ApertureServerException; + + + //////////////////////////////////////////////////////////////////////// + ///////////////////// SINGLE RESOURCE OPERATIONS /////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Accesses the resource. Returns all information the server is able + * to provide about a resource. The information is not stored in the RDF + * repository.<br> + * <br> + * Note that this method SHOULD work for most resources, though it is + * limited by the capabilities of the underlying Aperture framework. + * A DataAccessorFactory implementation must exist for the given URI scheme. + * + * @param resourceUrl the URL of the resource. It should be well-formed, + * that is it should contain the scheme (file:// http:// etc.) and + * all illegal characters (such as spaces) should be converted + * to appropriate escape sequences (such as %20). If no 'authority' + * part is given (such as file://localhost/D:/path/to/file.txt) then + * there should be THREE slashes after the scheme. The server will + * try to do its best if this string doesn't meet these requirements + * but no guarantees are made. + * @param formatMimetype the <a href="#rdfmime">rdf mimetype</a> serialization format + * for the returned string. + * @return RDF string representing the information about the given resource + * expressed with a given syntax + * @throws ApertureServerException if something goes wrong + */ + public String accessResource( + String resourceUrl, + String formatMimetype) throws ApertureServerException; + + /** + * Ensures that the information about a resource stored in the repository is + * up to date. If the resource is already known to the server, the + * information is refreshed, if not, then the resource is examined and the + * information is placed in the repository. This method is similar to the + * {@link #accessResource(String, String, boolean)} method, but it doesn't + * return anything, and the information is always stored in the repository + * <br> + * <br> + * Note that this method SHOULD work for most resources, though it is + * limited by the capabilities of the underlying Aperture framework. An URI + * must meet two requirements for this method to succeed: + * <ol> + * <li>A DataAccessorFactory implementation must exist for the given URI scheme</li> + * <li>It must be possible to match the URI to one of the data sources. If + * it's not possible, or the resource doesn't belong to any of the + * data sources defined for this server, then the user could + * use the {@link #accessResource()} method to obtain the RDF and store + * it in the repository on his/her own responsibility.</li> + * </ol> + * + * @param resourceUri the uri of the resource. It should be well-formed, + * that is it should contain the scheme (file:// http:// etc.) and + * all illegal characters (such as spaces) should be converted + * to appropriate escape sequences (such as %20). If no 'authority' + * part is given (such as file://localhost/D:/path/to/file.txt) then + * there should be THREE slashes after the scheme. The server will + * try to do its best if this string doesn't meet these requirements + * but no guarantees are made. + * @throws ApertureServerException if the server was unable to + * find an accessor for this scheme, or to match this URI with a + * DataSource, or if something else goes wrong + */ + public void refreshResourceInStore(String resourceUri) + throws ApertureServerException; + + + /** + * Stores information about the resource in the store. + * <p>If the resource is already known to the server, the + * information is refreshed, if not, then the resource is examined and the + * information is placed in the repository. This method is similar to + * {@link #accessResource(String, String, boolean)} method, but it doesn't + * return anything. + * Compared to {@link #refreshResourceInStore(String)}, this method will + * also store resources that can not be mapped to a configured data source. + * </p> + * <p>This method will store the extracted information in the resource-store, + * but the server provides no mechanism to later remove the data again. + * Only if the resource is part of a datasource, it will be kept up-to-date + * and removed if the underlying data object is removed.</p> + * Note that this method SHOULD work for most resources, though it is + * limited by the capabilities of the underlying Aperture framework. An URI + * must meet one requirement for this method to succeed: + * <ol> + * <li>A DataAccessorFactory implementation must exist for the given URI scheme</li> + * </ol> + * + * @param resourceUri the uri of the resource. It should be well-formed, + * that is it should contain the scheme (file:// http:// etc.) and + * all illegal characters (such as spaces) should be converted + * to appropriate escape sequences (such as %20). If no 'authority' + * part is given (such as file://localhost/D:/path/to/file.txt) then + * there should be THREE slashes after the scheme. The server will + * try to do its best if this string doesn't meet these requirements + * but no guarantees are made. + * @throws ApertureServerException if the server was unable to + * find an accessor for this scheme, or to match this URI with a + * DataSource, or if something else goes wrong + */ + public void storeResourceInStore(String resourceUri) + throws ApertureServerException; + + //////////////////////////////////////////////////////////////////////// + ////////////// INFORMATION ABOUT THE MONITORING PROCESS //////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * @param dataSourceUri the URI of the data source to be checked. + * @return The current state of the crawler for the given data source. + * @throws ApertureServerException if the source under the given URI + * has not been found, or something else goes wrong + */ + public STATE getCrawlerState(String dataSourceUri) + throws ApertureServerException; + + /** + * Returns the overall state of crawling For a more detailed view, use + * {@link #getDetailedCrawlerState(String)} + * + * @return The current state of the crawler. + */ + public STATE getOverallState(); + + /** + * This method returns a detailed report about the crawler state. The + * returned bean contains all volatile information about the crawler. The + * one that is not persisted in the configuration repository. + * + * @param dataSourceUri the uri of the data source. + * @return the detailed CrawlProgress bean for the given data source. + * @throws ApertureServerException if the source under the given URI + * has not been found or something else goes wrong + */ + public CrawlerStateBean getDetailedCrawlerState(String dataSourceUri) + throws ApertureServerException; + + /** + * Returns the URI that was last crawled by this aperture server. + * @return The URI of a data item that was last crawled by this aperture + * server. + */ + public String getLatestCrawledURI(); + + + //////////////////////////////////////////////////////////////////////// + //////////////////////////// NOTIFICATIONS ///////////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Add a state change listener. The listener will be notified about the + * changes of crawlers' states. + * + * @param listener The listener to be notified. + */ + public void addStateChangeListener(StateChangeListener listener); + + /** + * Remove a previously registerd StateChangeListener. + * @param listener The listener to be removed. + */ + public void removeStateChangeListener(StateChangeListener listener); + + /** + * Add a data source change listener. The listener will be notified about changes in data source configuration + * + * @param listener The listener to be notified. + */ + public void addDataSourceChangeListener(DataSourceChangeListener listener); + + /** + * Remove a previously registered DataSourceChangeListener. + * @param listener The listener to be removed. + */ + public void removeDataSourceChangeListener(DataSourceChangeListener listener); + + /** + * Add a {@link ApertureServerStateListener}, which will be notified about {@link ApertureServer} state. + * @param listener + */ + public void addStateListener(ApertureServerStateListener listener) ; + + /** + * Remove already registered {@link ApertureServerStateListener}. + * @param listener + */ + public void removeStateListener(ApertureServerStateListener listener) ; + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerException.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerException.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerException.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + +/** + * An exception that signifies a failure of the initialization of an ApertureServer. + * @author Antoni Mylka (Ant...@df...) + */ +public class ApertureServerException extends Exception { + + /** the serial version UID */ + private static final long serialVersionUID = 1221452939745112132L; + + /** Default constructor */ + public ApertureServerException() { + super(); + } + + /** + * Constructor accepting a cause + * @param cause the cause. + */ + public ApertureServerException(Throwable cause) { + super(cause); + } + + /** + * Constructor accepting a message. + * @param msg the message. + */ + public ApertureServerException(String msg) { + super(msg); + } + + /** + * Constructor accepting a message and a cause. + * @param msg the message + * @param cause the cause + */ + public ApertureServerException(String msg, Throwable cause) { + super(msg,cause); + } +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerException.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateEvent.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateEvent.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateEvent.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + + + +public class ApertureServerStateEvent { + STATE state; + + public ApertureServerStateEvent(STATE state) { + this.state = state; + } + + public STATE getState() { + return state; + } +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateEvent.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateListener.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateListener.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateListener.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,52 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + + +/** + * An interface that should be implemented by any object that would like + * to receive notifications about changing states of {@link ApertureServer}. + * + * Listener is notified whenever new data wraper is STARTED or STOPPED + * + * @author Tomasz Trela + * + */ + +public interface ApertureServerStateListener { + /** + * The method that will be called when a data {@link ApertureServerStateEvent} occurs. + * @param event The event instance. + */ + public void apertureServerStateChanged(ApertureServerStateEvent event); +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServerStateListener.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/CrawlerStateBean.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/CrawlerStateBean.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/CrawlerStateBean.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,250 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + +import java.io.Serializable; + +/** + * The progress of the crawling process + * @author antheque + * + */ +public class CrawlerStateBean implements Serializable, Cloneable { + + /** The serial version UID */ + private static final long serialVersionUID = -1463440649373959205L; + + //////////////////////////////////////////////////////////////////////// + ////////////////////////// PRIVATE FIELDS ////////////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** The time of last completed crawl, or <= 0 if there was none */ + private long lastCrawled; + /** The time when the last crawl began, or <= 0 if there was none */ + private long lastCrawlBegin; + /** The current state of the crawler */ + private STATE state; + /** New objects found on the current crawl */ + private int newObjects; + /** Objects recognized as modified during the current crawl */ + private int modifiedObjects; + /** Objects not found on the current crawl - recognized as deleted */ + private int deletedObjects; + /** Objects recognized as unmodified on the current crawl */ + private int unmodifiedObjects; + /** Last error message */ + private String lastErrorMessage; + + //////////////////////////////////////////////////////////////////////// + //////////////////////////// CONSTRUCTOR /////////////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** The default constructor. Initializes the state field to IDLE */ + public CrawlerStateBean() { + state = STATE.IDLE; + } + + //////////////////////////////////////////////////////////////////////// + //////////////////////// GETTERS AND SETTERS /////////////////////////// + //////////////////////////////////////////////////////////////////////// + + /** + * Return the current state of crawling. This returns a float [0,1] which + * is a guesstimate of the completion of the full crawl. Nice for GUI + * progress bars etc! + * + * @return [0,1] indicator of crawling process. + */ + public float getProgress() { + return 0f; + } + + /** + * Returns the lastCrawled. + * + * @return the lastCrawled. This value is <= 0 if the crawler hasn't + * finished yet. + */ + public long getLastCrawled() { + return lastCrawled; + } + /** + * Sets the new value of the lastCrawled field. + * + * @param lastCrawled the lastCrawled to set. + */ + public void setLastCrawled(long lastCrawled) { + this.lastCrawled = lastCrawled; + } + /** + * Returns the delObjects. + * + * @return the delObjects. + */ + public int getDeletedObjects() { + return deletedObjects; + } + /** + * Sets the new value of the delObjects field. + * + * @param delObjects the delObjects to set. + */ + public void setDeletedObjects(int delObjects) { + this.deletedObjects = delObjects; + } + /** + * Returns the lastErrorMessage. + * + * @return the lastErrorMessage. + */ + public String getLastErrorMessage() { + return lastErrorMessage; + } + /** + * Sets the new value of the lastErrorMessage field. + * + * @param lastErrorMessage the lastErrorMessage to set. + */ + public void setLastErrorMessage(String lastErrorMessage) { + this.lastErrorMessage = lastErrorMessage; + } + /** + * Returns the modObjects. + * + * @return the modObjects. + */ + public int getModifiedObjects() { + return modifiedObjects; + } + /** + * Sets the new value of the modObjects field. + * + * @param modObjects the modObjects to set. + */ + public void setModifiedObjects(int modObjects) { + this.modifiedObjects = modObjects; + } + /** + * Returns the newObjects. + * + * @return the newObjects. + */ + public int getNewObjects() { + return newObjects; + } + + /** + * Sets the unmodified objects. + * @param unmodObjects + */ + public void setUnmodifiedObjects(int unmodObjects) { + this.unmodifiedObjects = unmodObjects; + } + + /** + * @return The number of unmodified objects. + */ + public int getUnmodifiedObjects() { + return this.unmodifiedObjects; + } + + /** + * Sets the new value of the newObjects field. + * + * @param newObjects the newObjects to set. + */ + public void setNewObjects(int newObjects) { + this.newObjects = newObjects; + } + /** + * Returns the state. + * + * @return the state. + */ + public STATE getState() { + if (lastErrorMessage == null) { + return state; + } else { + if (state.equals(STATE.STOPPING) || state.equals(STATE.ERROR)){ + return state; + } else if (state.equals(STATE.STOPPED)) { + return STATE.STOPPED_WITH_ERRORS; + } else if (state.equals(STATE.CRAWLING)) { + return STATE.CRAWLING_WITH_ERRORS; + } else if (state.equals(STATE.DONE)) { + return STATE.DONE_WITH_ERRORS; + } else if (state.equals(STATE.IDLE)) { + return STATE.ERROR; + } else { + return state; + } + } + } + /** + * Sets the new value of the state field. + * + * @param state the state to set. + */ + public void setState(STATE state) { + this.state = state; + } + + /** + * @return a copy of this crawler state bean. + */ + public CrawlerStateBean cloneCrawlerStateBean() { + try { + return (CrawlerStateBean)clone(); + } catch (CloneNotSupportedException cnle) { + throw new RuntimeException(cnle); + } + } + + /** + * Returns the lastCrawlBegin. + * + * @return the lastCrawlBegin. + */ + public long getLastCrawlBegin() { + return lastCrawlBegin; + } + + /** + * Sets the new value of the lastCrawlBegin field. + * + * @param lastCrawlBegin the lastCrawlBegin to set. + */ + public void setLastCrawlBegin(long lastCrawlBegin) { + this.lastCrawlBegin = lastCrawlBegin; + } +} \ No newline at end of file Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/CrawlerStateBean.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangeListener.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangeListener.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangeListener.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + +/** + * An interface that should be implemented by any object that would like + * to receive notifications about changing states of data source configuration. + * + * Listener is notified whenever new data source is created, or removed old one. + * + * @author Tomasz Trela + * + */ +public interface DataSourceChangeListener { + /** + * The method that will be called when a data sources configuration change occurs. + * @param event The event instance. + */ + public void dataSourceChanged(DataSourceChangedEvent event); +} \ No newline at end of file Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangeListener.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangedEvent.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangedEvent.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangedEvent.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,46 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + +public class DataSourceChangedEvent { + private String dataSourceURIString; + + public DataSourceChangedEvent(String dataSourceURIString) { + this.dataSourceURIString = dataSourceURIString; + } + + public String getDataSourceURIString() { + return dataSourceURIString; + } + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceChangedEvent.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ResourceBean.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ResourceBean.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ResourceBean.java 2010-01-19 11:34:07 UTC (rev 2212) @@ -0,0 +1,313 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server; + +import java.io.Serializable; + +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.vocabulary.NAO; + + +/** + * This class represents an objects, such as a NIE:InformationElement, + * a PIMO:Thing, a NAO:Tag, an RDFS:Resource, or any other object that + * can be identified by an URI in general. + * It is based on the modeling of RDF, hence the bean properties are + * named after RDF elements (label, class, uri). + * This class is intended to be subclassed to be extended with more + * values, if needed. + * + * <h2 id="labeling">Displaying ResourceBeans</h2> + * The label of a resourcebean is also the string representation, + * the {@link #toString()} method returns the same result as + * {@link #getLabel()}. + * + * <h2 id="equality">Equality and HashCode</h2> + * Two resourcebeans are considered equal when their URI is equal, + * even when other values differ. + * Java requires that equality and hashcode are implemented the same, + * hence the returned hashcode for this object is the hashcode of the + * URI (or the object hash-code, if no URI is set). + * + * <h2 id="sorting">Sorting</h2> + * ResourceBeans are sorted by the value of {@link #getLabel()}, + * if no labels are set, by {@link #getUri()}, else they are considered equal. + * + * <h2>The ResourceBeanInterface VS the ResourceBean</h2> + * ResourceBean has a fitting interface, allowing other classes to implement it. + * All methods inside the Services package <b>should</b> use concrete classes in the + * signatures of the methods used in the interface. + * The reason to use beans instead of interfaces is serialization: + * in RPC calls, it is more straightforward to serialize beans than to serialize interfaces. + * + * <h2>ResourceBean in other frameworks</h2> + * This ResourceBean class is equivalent (in terms of intended use) to RDFReactor's + * org.ontoware.rdfreactor.schema.rdfs.Resource or GnoGno's + * org.gnogno.gui.GnoResource. + * @author sauermann + * + */ +public class ResourceBean implements Comparable<ResourceBean>, Serializable { + + + /** + * @see #getUri() + */ + String uri; + + /** + * @see #getLabel() + */ + String label; + + /** + * @see #getRdfsClass() + */ + ResourceBean rdfsClass; + + /** + * @see #getComment() + */ + String comment; + + /** + * This constructor is only to be used when deserializing a ResourceBean + * by RPC frameworks. + */ + public ResourceBean() { + + } + + /** + * Create a ResourceBean. + * @param uri the URI + */ + public ResourceBean(String uri) { + this.uri = uri; + } + + /** + * Create a ResourceBean + */ + public ResourceBean(String uri, String label) { + super(); + this.uri = uri; + this.label = label; + } + /** + * Create a ResourceBean + */ + public ResourceBean(String uri, String label, String comment) { + super(); + this.uri = uri; + this.label = label; + this.comment = comment; + } + /** + * Create a ResourceBean + */ + public ResourceBean(String uri, String label, String comment, + ResourceBean rdfsClass) { + super(); + this.uri = uri; + this.label = label; + this.comment = comment; + this.rdfsClass = rdfsClass; + } + + /** + * Create a ResourceBean from an RDF2Go URI + * @param uri + */ + public ResourceBean(URI uri) { + this.uri = uri.toString(); + } + + /** + * see above at <a href="#sorting">sorting</a> + */ + public int compareTo(ResourceBean o) { + if ((label != null) && (o.label != null)) + return label.compareTo(o.label); + if ((uri != null) && (o.uri != null)) + return uri.compareTo(o.uri); + return 0; + } + + /** + * see above at <a href="#equality">Equality</a> + */ + @Override + public boolean equals(Object obj) { + if (obj instanceof ResourceBean) + { + ResourceBean other = (ResourceBean)obj; + String otheruri = (other.getUri()); + if ((otheruri != null) && (uri != null)) + return uri.equals(otheruri); + } + return super.equals(obj); + } + + /** + * The comment is intended to be used for texts that + * explain more about the resource. + * Values can be the rdfs:comment of this resource, the nao:description, + * or other exp... [truncated message content] |
From: <leo...@us...> - 2010-02-09 19:51:35
|
Revision: 2219 http://aperture.svn.sourceforge.net/aperture/?rev=2219&view=rev Author: leo_sauermann Date: 2010-02-09 19:51:10 +0000 (Tue, 09 Feb 2010) Log Message: ----------- - made the aperture server gui work. - added a crawler handler for Drupal's Service XMLRPC api - played around with it. it often works! Modified Paths: -------------- aperture-webserver/trunk/README.txt aperture-webserver/trunk/WebContent/WEB-INF/web.xml aperture-webserver/trunk/WebContent/index.jsp aperture-webserver/trunk/WebContent/style.css aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourcePool.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureCrawlerFactory.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java Added Paths: ----------- aperture-webserver/trunk/WebContent/WEB-INF/aperture.tld aperture-webserver/trunk/WebContent/WEB-INF/lib/jstl.jar aperture-webserver/trunk/WebContent/WEB-INF/lib/standard.jar aperture-webserver/trunk/WebContent/WEB-INF/lib/ws-commons-util-1.0.2.jar aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-client-3.1.2.jar aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-common-3.1.2.jar aperture-webserver/trunk/WebContent/config/ aperture-webserver/trunk/WebContent/config/accessResource.jsp aperture-webserver/trunk/WebContent/config/error.jsp aperture-webserver/trunk/WebContent/config/index.jsp aperture-webserver/trunk/WebContent/config/newsource.jsp aperture-webserver/trunk/WebContent/config/nowrapper.jsp aperture-webserver/trunk/WebContent/config/sourcedetails.jsp aperture-webserver/trunk/WebContent/config/style.css aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalServiceCommands.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceClassBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/FresnelEditor.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/CheckBoxOption.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/ComboBoxOption.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/ConfigurationOptionBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/ConfigurationOptionType.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/IntegerFieldOption.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/MultipleTextFieldOption.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/PasswordFieldOption.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/option/TextFieldOption.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceDAOFactory.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceInstanceDAO.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceOntologyDAO.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/exception/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/exception/DataWrapperUIException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/exception/ValidationFailedException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/AccessResourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/CrawlControlServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/ModifySourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/OpenResourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RefreshResourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RemoveSourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/tag/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/tag/CrawlButtonTag.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/tag/StateTableCellTag.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/util/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/util/ModelUtil.java Modified: aperture-webserver/trunk/README.txt =================================================================== --- aperture-webserver/trunk/README.txt 2010-02-03 20:28:28 UTC (rev 2218) +++ aperture-webserver/trunk/README.txt 2010-02-09 19:51:10 UTC (rev 2219) @@ -27,6 +27,7 @@ - get the latest aperture release: aperture 1.4.0 - copy all aperture JARs into the WebContent/WEB-INF/lib folder (use the aperture-runtime-onejar-1.4.0 version) the needed files are listed in the svn:ignore property of the lib folder. +- We use Servlets v2.4, JSP 2.0, and JSTL v1.1. Building Releases ================= Added: aperture-webserver/trunk/WebContent/WEB-INF/aperture.tld =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/aperture.tld (rev 0) +++ aperture-webserver/trunk/WebContent/WEB-INF/aperture.tld 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> +<!DOCTYPE taglib + PUBLIC "-//Sun Microsystems, Inc.//DTD JSP Tag Library 1.2//EN" + "http://java.sun.com/j2ee/dtd/web-jsptaglibrary_1_2.dtd"> +<taglib> + <tlib-version>1.0</tlib-version> + <jsp-version>2.0</jsp-version> + <short-name>aperture</short-name> + <uri>http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib</uri> + <tag> + <name>statuscell</name> + <tag-class>org.semanticdesktop.aperture.server.configui.tag.StateTableCellTag</tag-class> + <attribute> + <name>state</name> + <required>false</required> + <rtexprvalue>true</rtexprvalue> + </attribute> + </tag> + <tag> + <name>crawlbutton</name> + <tag-class>org.semanticdesktop.aperture.server.configui.tag.CrawlButtonTag</tag-class> + <attribute> + <name>bean</name> + <required>false</required> + <rtexprvalue>true</rtexprvalue> + </attribute> + </tag> +</taglib> Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/jstl.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/jstl.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/standard.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/standard.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/ws-commons-util-1.0.2.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/ws-commons-util-1.0.2.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-client-3.1.2.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-client-3.1.2.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-common-3.1.2.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-common-3.1.2.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: aperture-webserver/trunk/WebContent/WEB-INF/web.xml =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-02-03 20:28:28 UTC (rev 2218) +++ aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-02-09 19:51:10 UTC (rev 2219) @@ -22,6 +22,60 @@ org.semanticdesktop.aperture.servlet.ApertureServerServlet</servlet-class> <load-on-startup>1</load-on-startup> </servlet> + <servlet> + <description> + </description> + <display-name> + CrawlControlServlet</display-name> + <servlet-name>CrawlControlServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.server.configui.servlet.CrawlControlServlet</servlet-class> + </servlet> + <servlet> + <description> + </description> + <display-name> + ModifySourceServlet</display-name> + <servlet-name>ModifySourceServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.server.configui.servlet.ModifySourceServlet</servlet-class> + </servlet> + <servlet> + <description> + </description> + <display-name> + RemoveSourceServlet</display-name> + <servlet-name>RemoveSourceServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.server.configui.servlet.RemoveSourceServlet</servlet-class> + </servlet> + <servlet> + <description> + </description> + <display-name> + OpenResourceServlet</display-name> + <servlet-name>OpenResourceServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.server.configui.servlet.OpenResourceServlet</servlet-class> + </servlet> + <servlet> + <description> + </description> + <display-name> + AccessResourceServlet</display-name> + <servlet-name>AccessResourceServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.server.configui.servlet.AccessResourceServlet</servlet-class> + </servlet> + <servlet> + <description> + </description> + <display-name> + RefreshResourceServlet</display-name> + <servlet-name>RefreshResourceServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.server.configui.servlet.RefreshResourceServlet</servlet-class> + </servlet> <servlet-mapping> <servlet-name>FileInspector</servlet-name> <url-pattern>/FileInspector</url-pattern> @@ -30,6 +84,30 @@ <servlet-name>ApertureServerServlet</servlet-name> <url-pattern>/ApertureServer</url-pattern> </servlet-mapping> + <servlet-mapping> + <servlet-name>CrawlControlServlet</servlet-name> + <url-pattern>/config/crawlControl</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>ModifySourceServlet</servlet-name> + <url-pattern>/config/modifySource</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>RemoveSourceServlet</servlet-name> + <url-pattern>/config/deleteSource</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>OpenResourceServlet</servlet-name> + <url-pattern>/config/openResource</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>AccessResourceServlet</servlet-name> + <url-pattern>/config/accessResource</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>RefreshResourceServlet</servlet-name> + <url-pattern>/config/refreshResource</url-pattern> + </servlet-mapping> <welcome-file-list> <welcome-file>index.jsp</welcome-file> </welcome-file-list> Added: aperture-webserver/trunk/WebContent/config/accessResource.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/accessResource.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/accessResource.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,26 @@ +<%@ page language="java" contentType="text/html; charset=ISO-8859-1" + pageEncoding="ISO-8859-1"%> +<%@ page import="java.io.PrintWriter" %> +<%@ page import="java.io.StringWriter" %> +<%@ page import="org.ontoware.rdf2go.model.Model" %> + +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>Insert title here</title> +</head> +<body> +<center> + The RDF/XML data extracted from the resource. + <table> + <tr> + <td align="left"> + <pre><code>${response}</code></pre> + </td> + </tr> + </table> + <a href="index.jsp">Back to the index</a> +</center> +</body> +</html> \ No newline at end of file Added: aperture-webserver/trunk/WebContent/config/error.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/error.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/error.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,37 @@ +<%@ page language="java" contentType="text/html; charset=ISO-8859-1" + pageEncoding="ISO-8859-1"%> +<%@ page import="java.io.PrintWriter" %> +<%@ page import="java.io.StringWriter" %> +<%@ page import="org.ontoware.rdf2go.model.Model" %> + +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<% + Exception exception = (Exception)request.getAttribute("error"); + StringWriter stringWriter = new StringWriter(); + PrintWriter writer = new PrintWriter(stringWriter); + exception.printStackTrace(writer); + String message = stringWriter.toString(); +%> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>Insert title here</title> +</head> +<body> +<center> + A following error occured. + <table> + <tr> + <td align="left"> + <pre><%= message %></pre> + </td> + </tr> + </table> + <a href="index.jsp">Back to the index</a> +</center> +</body> +</html> + +<% + session.removeAttribute("error"); +%> \ No newline at end of file Added: aperture-webserver/trunk/WebContent/config/index.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/index.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/index.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,150 @@ +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> +<%@ taglib prefix="aperture" uri="http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> +<%@page import="java.util.List"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceOntologyDAO"%> +<%@page import="org.ontoware.rdf2go.model.node.URI"%> +<%@page import="java.util.LinkedList"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceClassBean"%> + +<% + DataSourceInstanceDAO dao = DataSourceDAOFactory.getInstanceDao(); + if (dao == null) { + RequestDispatcher rd = request.getRequestDispatcher("/nowrapper.jsp"); + rd.forward(request, response); + return; + } + List<DataSourceUIBean> dataSources = dao.getDataSourceList(); + request.setAttribute("sourcesList", dataSources); + + DataSourceOntologyDAO ontologyDao = DataSourceDAOFactory + .getOntologyDao(); + List<DataSourceClassBean> sourcesList = ontologyDao + .getDataSourceClassBeans(); + request.setAttribute("sourceTypes", sourcesList); +%> + +<html> +<HEAD> +<TITLE>Aperture data wrapper configuration.</TITLE> + <link title="default" rel="stylesheet" type="text/css" + href="style.css" media="screen" /> + +</HEAD> +<body> +<h1>Aperture data wrapper configuration</h1> + + + + +<c:choose> + <c:when test="${not empty sourcesList}"> + <h2>Currenly configured data sources.</h2> + <table> + <tr class="tableheader"> + <td>Name:</td> + <td>Type:</td> + <td>Status:</td> + <td>Last crawled:</td> + <td>Next Crawl</td> + <td colspan="3">Actions:</td> + </tr> + <c:forEach items="${sourcesList}" var="source"> + <tr> + <td>${source.name}</td> + <td>${source.typeLabel}</td> + <aperture:statuscell state="${source.crawlingState}" /> + <td>${source.lastCrawledString}</td> + <td>${source.nextCrawlString}</td> + <td> + <aperture:crawlbutton bean="${source}"/> + </td> + <td> + <form name="sourceDetailsForm" + action="sourcedetails.jsp" + method="get"> + <input type="hidden" + name="uri" + value="${source.dataSourceURIEscaped}"> + <button name="crawl" + type="submit" + value="details"> + Show Details + </button> + </form> + </td> + <td> + <form name="deleteSourceForm" + action="deleteSource" + method="post"> + <input type="hidden" + name="uri" + value="${source.dataSourceURIEscaped}"> + <button name="crawl" + type="submit" + value="delete"> + Delete + </button> + </form> + </td> + </tr> + </c:forEach> + </table> + </c:when> + <c:otherwise> + <h2>There are no data sources configured at the moment </h2> + </c:otherwise> +</c:choose> + + +<% + List<DataSourceUIBean> list + = (List<DataSourceUIBean>)request.getAttribute("sourcesList"); + for (DataSourceUIBean bean : list) { + bean.dispose(); + } + request.removeAttribute("sourcesList"); +%> + +<h2>Add new data source:</h2> +<c:forEach items="${sourceTypes}" var="bean"> + <p align="center"><a href="newsource.jsp?type=${bean.uriEscaped}">${bean.label}</a></p> +</c:forEach> + +<h2>Access resource</h2> +<form name="accessResourceForm" action="accessResource" method="post"> +<table> + <tr> + <td>Uri of the resource to access:</td> + <td> + <input name="uri" type="text" size="100" value="enter uri here" /> + </td> + <td> + <button name="open" type="submit" value="open">Access!</button> + </td> + </tr> +</table> +</form> +<br><br> + +<h2>Refresh resource</h2> +<form name="refreshResourceForm" action="refreshResource" method="post"> +<table> + <tr> + <td>Uri of the resource to refresh:</td> + <td> + <input name="uri" type="text" size="100" value="enter uri here" /> + </td> + <td> + <button name="open" type="submit" value="open">Refresh!</button> + </td> + + </tr> +</table> +</form> +<br><br> + +</body> +</html> Added: aperture-webserver/trunk/WebContent/config/newsource.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/newsource.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/newsource.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,28 @@ +<%@page import="java.util.logging.Logger"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> + +<% + + Logger log = Logger.getLogger(this.getClass().getName()); + // Get the type to instantiate + String type = request.getParameter("type"); + log.fine("editing new source of type: "+type); + + FresnelEditor editor = new FresnelEditor(null, null, type); + String typeLabel = DataSourceDAOFactory.getOntologyDao().getTypeLabel(type); + request.setAttribute("typeLabel",typeLabel); +%> +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<title>Add a new ${typeLabel}</title> +<link title="default" rel="stylesheet" type="text/css" + href="style/style.css" media="screen" /> +</head> +<body> +<h1>Configure new ${typeLabel} </h1> +<%= editor.createFullForm() %> +</body> +</html> \ No newline at end of file Added: aperture-webserver/trunk/WebContent/config/nowrapper.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/nowrapper.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/nowrapper.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,23 @@ +<%@ page language="java" contentType="text/html; charset=ISO-8859-1" + pageEncoding="ISO-8859-1"%> +<%@ page import="java.io.PrintWriter" %> +<%@ page import="java.io.StringWriter" %> +<%@ page import="org.ontoware.rdf2go.model.Model" %> + +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>No active dat wrapper has been detected.</title> +</head> +<body> +<center> + No active data wrapper has been detected. + Refresh this page when the data wrapper is activated. +</center> +</body> +</html> + +<% + session.removeAttribute("error"); +%> \ No newline at end of file Added: aperture-webserver/trunk/WebContent/config/sourcedetails.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcedetails.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,102 @@ +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> +<%@ taglib prefix="aperture" uri="http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib"%> +<%@page import="java.net.URLDecoder"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> + + +<% + String uriString = (String)request.getParameter("uri"); + if (uriString == null) { + response.sendRedirect("index.jsp"); + return; + } + String uriDecoded = URLDecoder.decode(uriString,"UTF-8"); + DataSourceInstanceDAO dao = DataSourceDAOFactory.getInstanceDao(); + DataSourceUIBean bean = dao.getDataSourceBean(uriDecoded); + if (bean != null) { + request.setAttribute("bean",bean); + } + + FresnelEditor fresnelEditor = new FresnelEditor( + bean.getConfigurationModel(), + bean.getDataSourceURI(), + bean.getTypeUri().toString()); +%> + +<html> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> + <title>Details for ${bean.name}</title> + <link title="default" rel="stylesheet" type="text/css" + href="style.css" media="screen" /> +</head> +<body> +<% if (bean == null) { %> + <center> + <h1>Data source not found</h1> + <h2><a href="index.jsp">Back to the sources list</a></h2> + </center> +<% } else { %> + <h1>Details of '${bean.name}' data source</h1> + + <h2>Basic data source information</h2> + <%= fresnelEditor.createCommonFormPart() %> + <TR> + <TD class="rowheader">Status:</TD> + <aperture:statuscell state="${bean.crawlingState}"/> + </TR> + </table> + + <h2>Detailed data source configuration</h2> + + <%= fresnelEditor.createSpecificFormPart() %> + + <h2><aperture:crawlbutton bean="${bean}"/></h2> + + <%if (bean.getLastErrorMessage() != null) {%> + <h2>Error message</h2> + <table class="sourcedetails"> + <TR> + <TD class="errormsg"> + <pre>${bean.lastErrorMessage}</pre> + </TD> + </TR> + </table> + <%} %> + + <h2>Crawl report</h2> + <table > + <TR> + <TD class="rowheader">Crawl started:</TD> + <td class="configinput">${bean.crawlStartedString}</td> + </TR> + <TR> + <TD class="rowheader">Crawl stopped:</TD> + <td class="configinput">${bean.crawlStoppedString}</td> + </TR> + <TR> + <TD class="rowheader">New objects:</TD> + <td class="configinput">${bean.newObjects}</td> + </TR> + <TR> + <TD class="rowheader">Changed objects:</TD> + <td class="configinput">${bean.modifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Unchanged objects:</TD> + <td class="configinput">${bean.unmodifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Removed objects:</TD> + <td class="configinput">${bean.deletedObjects}</td> + </TR> + </table> + + <h2><a href="index.jsp">Back to the data sources list</a></h2> +<% bean.dispose(); + } %> +</body> +</html> Added: aperture-webserver/trunk/WebContent/config/style.css =================================================================== --- aperture-webserver/trunk/WebContent/config/style.css (rev 0) +++ aperture-webserver/trunk/WebContent/config/style.css 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,116 @@ + +html { + font-family: verdana, arial, serif; + font-size: 82%; + line-height: 120%; + color: #333; +} + +body { + margin-left: 50px; + margin-right: 50px; + padding: 50px 38px 0 37px; +} + +h1 { + text-align: center; + font-size: 200%; +} + +h2 { + margin: 20px 0 15px 0; + padding: 0; + text-align: center; + font-size: 130%; +} + +.center { + text-align: center; +} + +button { + width:130px +} + +button.center { + margin-left: auto; + margin-right:auto; +} + +table { + margin-left: auto; + margin-right: auto; +} + +table.sourcedetails { + width: 590px +} + +.comment { + color: gray; + font-size: xx-small; +} + +tr.tableheader { + font-weight:bold; + text-align:center; +} + +td.rowheader { + width: 250px; + font-weight:bold; +} + +td.configinput { + text-align: left; + width: 400px; +} + +td.status { + padding-left: 50px; + padding-right: 50px; + text-align:center; +} + +.idle { + background-color: gray; +} + +.error { + background-color: red; + font-weight: bold; +} + +.crawling { + background-color: green; +} + +.warning { + background-color: yellow; +} + +.errormsg { + text-align: left; + font-size: 80%; +} + + +.configlabel { + text-align: left; + vertical-align: top; +} + +a:link { + text-decoration: none; + color: #CC0000; +} + +a:visited { + text-decoration: none; + color: #CC6666; +} + +a:hover { + text-decoration: underline; + color: #CC0000; +} \ No newline at end of file Modified: aperture-webserver/trunk/WebContent/index.jsp =================================================================== --- aperture-webserver/trunk/WebContent/index.jsp 2010-02-03 20:28:28 UTC (rev 2218) +++ aperture-webserver/trunk/WebContent/index.jsp 2010-02-09 19:51:10 UTC (rev 2219) @@ -15,29 +15,27 @@ <title>Aperture Document Inspection</title> </head> <body> - -<h1>Aperture Document Inspection</h1> - +<h2>Aperture Document Inspection</h2> <div> Wecome to the Aperture Document Inspection. Please send an URL or a document and we will return the content in rdf/xml. </div> - -<div class="fileupload"> -<p>Please chose a document.</p> +<div class="uploadform"> +<p>Upload a document</p> <form action="FileInspector" method="post" enctype="multipart/form-data"> <input type="file" name="name" /> <input type="submit" name="upload" value="upload" /> </form> </div> -<div class="htmlupload"> -<p>Please type in the URL of a web site.</p> +<div class="uploadform"> +<p>Let aperture retrieve a document from an URL</p> <form action="FileInspector" method="post"> <input type="text" name="url" /> <input type="submit" name="upload" value="upload" /> </form> </div> - +<h2>Crawling Server</h2> +<div><a href="config/index.jsp">Configure Aperture Crawling Server</a></div> </body> </html> </jsp:root> \ No newline at end of file Modified: aperture-webserver/trunk/WebContent/style.css =================================================================== --- aperture-webserver/trunk/WebContent/style.css 2010-02-03 20:28:28 UTC (rev 2218) +++ aperture-webserver/trunk/WebContent/style.css 2010-02-09 19:51:10 UTC (rev 2219) @@ -1,6 +1,6 @@ /* basic elements */ body { - font: times; + font-family: verdana, arial; background: #fffff1; text-align: center; } @@ -8,25 +8,16 @@ h1 { font: bold 16pt; letter-spacing: 1px; - margin-bottom: 100pt; text-align: center; } -.fileupload { +.uploadform { display: block; background-color: #ffffff; width: 80%; border-style: solid; - text-align: center; + text-align: left; margin: 20pt; -} - -.htmlupload { - display: block; - background-color: #ffffff; - width: 80%; - border-style: solid; - text-align: center; - margin: 20pt; -} + padding: 10pt; +} \ No newline at end of file Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,214 @@ +/** + * + */ +package org.semanticdesktop.aperture.drupalhandler; + +import java.util.Date; +import java.util.Set; + +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.crawler.Crawler; +import org.semanticdesktop.aperture.crawler.base.CrawlerHandlerBase; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierFactory; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierRegistry; +import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.server.impl.ApertureRegistriesAggregate; +import org.semanticdesktop.aperture.vocabulary.NIE; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A crawler handler that speaks to a drupal server via XML-RPC. + * @author sauermann + * + */ +public class DrupalCrawlerHandler extends CrawlerHandlerBase { + + private static Logger logger = LoggerFactory.getLogger(DrupalCrawlerHandler.class); + + /** + * Testing parameters + * @param args + */ + public static void main(String[] args) { + DrupalXmlRpcService service = new DrupalXmlRpcService( + "localhost", + "dac5b06a2e63eed9336ce24f5e56a181", + "http://localhost/organikdrupal/?q=services/xmlrpc"); + if(service.connect() == true) + { + service.login("root", "root"); + try { + service.testFileSave(); + DrupalNode node = new DrupalNode(); + node.setType(DrupalNode.TYPE_STORY); + node.setTitle("HEllo WORLD"); + node.setBody("at "+new Date().toGMTString()); + service.nodeSave(node); + } catch (Exception e) { + System.out.println(e.toString()); + } + + service.logout(); + } + + } + + /** + * Domain needed to connect to drupal + */ + String domain; + + /** + * API Key + */ + String apikey; + + /** + * xmlrpc host service URL. + * example: http://localhost/organikdrupal/?q=services/xmlrpc + * + */ + String serviceUrl; + + /** + * username for auth + */ + String username; + + /** + * password for auth + */ + String password; + + /** + * The service, it is initialized in connect() and closed in dispose() + */ + DrupalXmlRpcService service; + + + public DrupalCrawlerHandler(ApertureRegistriesAggregate registries) { + super( + // TODO - use something cleverer here + new MagicMimeTypeIdentifier(), + registries.getExtractorRegistry(), null); + + this.domain = "localhost"; + this.apikey = "dac5b06a2e63eed9336ce24f5e56a181"; + this.serviceUrl = "http://localhost/organikdrupal/?q=services/xmlrpc"; + this.username = "root"; + this.password = "root"; + setExtractingContents(true); + } + + private static MimeTypeIdentifier getMimeTypeIdentifier( + ApertureRegistriesAggregate apertureRegistries) { + MimeTypeIdentifierRegistry registry + = apertureRegistries.getMimeTypeIdentifierRegistry(); + Set set = registry.getAll(); + if (!set.isEmpty()) { + MimeTypeIdentifierFactory factory + = (MimeTypeIdentifierFactory) set.iterator().next(); + return factory.get(); + } else { + logger.warn("No mime type identifier found. " + + "No full-text extraction possible"); + return new MagicMimeTypeIdentifier(); + } + } + + + + public DrupalCrawlerHandler(String domain, String apikey, + String serviceUrl, String username, String password) { + super(); + this.domain = domain; + this.apikey = apikey; + this.serviceUrl = serviceUrl; + this.username = username; + this.password = password; + setExtractingContents(true); + } + + public void connect() throws Exception { + service = new DrupalXmlRpcService(domain, apikey, serviceUrl); + if(!service.connect()) + throw new Exception("Cannot connect to '"+serviceUrl+"'"); + + service.login(username, password); + } + + public void dispose() { + if (service != null) { + service.logout(); + service = null; + } + } + + @Override + public void objectChanged(Crawler crawler, DataObject object) { + try { + super.processBinary(crawler, object); + } catch (Exception e) { + logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); + } + + // push to drupal + try { + // connect + if (!connected()) + connect(); + + // convert + RDFContainer data = object.getMetadata(); + DrupalNode node = new DrupalNode(); + node.setType(DrupalNode.TYPE_STORY); + node.setTitle(data.getString(NIE.title)); + node.setBody(data.getString(NIE.plainTextContent)); + service.nodeSave(node); + } catch (Exception x) { + logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); + } + + super.objectChanged(crawler, object); + } + + @Override + public void objectNew(Crawler crawler, DataObject object) { + try { + super.processBinary(crawler, object); + } catch (Exception e) { + logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); + } + + // push to drupal + try { + // connect + if (!connected()) + connect(); + + // convert + RDFContainer data = object.getMetadata(); + DrupalNode node = new DrupalNode(); + node.setType(DrupalNode.TYPE_STORY); + node.setTitle(data.getString(NIE.title)); + node.setBody(data.getString(NIE.plainTextContent)); + service.nodeSave(node); + } catch (Exception x) { + logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); + } + super.objectNew(crawler, object); + } + + private boolean connected() { + return service != null; + } + + @Override + public void objectRemoved(Crawler crawler, String url) { + super.objectRemoved(crawler, url); + } + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,119 @@ +/** + * + */ +package org.semanticdesktop.aperture.drupalhandler; + +import java.util.HashMap; +import java.util.Map; + +/** + * A drupal Node. + * It sets and gets all its variables as MAP entries. + * @author sauermann + * + */ +public class DrupalNode extends HashMap<String, Object> { + + /** + * types + */ + public static final String TYPE_STORY = "story"; + + public static String NID = "nid"; + public static String TYPE = "type"; + public static String LANGUAGE = "language"; + public static String UID = "uid"; + public static String STATUS = "status"; + public static String CREATED = "created"; + public static String CHANGED = "changed"; + public static String TITLE = "title"; + public static String BODY = "body"; +/* more for future + * public static String NID = "comment"; + public static String NID = "promote"; + public static String NID = "moderate"; + public static String NID = "sticky"; + public static String NID = "tnid"; + public static String NID = "translate"; + public static String NID = "vid"; + public static String NID = "revision_uid"; + public static String NID = "teaser"; + public static String NID = "log"; + public static String NID = "revision_timestamp"; + public static String NID = "format"; + public static String NID = "name"; + public static String NID = "picture"; + public static String NID = "data"; + public static String NID = "rdf"; + public static String NID = "last_comment_timestamp"; + public static String NID = "last_comment_name"; + public static String NID = "comment_count"; + public static String NID = "taxonomy"; + public static String NID = "build_mode"; + public static String NID = "readmore"; + public static String NID = "content"; + */ + + public DrupalNode() { + super(); + } + + public DrupalNode(int initialCapacity, float loadFactor) { + super(initialCapacity, loadFactor); + } + + public DrupalNode(int initialCapacity) { + super(initialCapacity); + } + + public DrupalNode(Map m) { + super(m); + } + + public long getNid() { + Object nid = get(NID); + if (nid == null) + throw new RuntimeException("nid not set (null)"); + if (nid instanceof String) { + long result = Long.parseLong((String)nid); + put(NID, result); // replace, for future + return result; + } else + return (Long)nid; + } + + public void setNid(long nid) { + put(NID, nid); + } + + public String getTitle() { + Object o = get(TITLE); + if (o==null) + return null; + else + return o.toString(); + } + + public void setTitle(String o) { + put(TITLE, o); + } + + public String getBody() { + Object o = get(BODY); + if (o==null) + return null; + else + return o.toString(); + } + + public void setBody(String o) { + put(BODY, o); + } + + public void setType(String o) { + put(TYPE, o); + } + + + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalServiceCommands.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalServiceCommands.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalServiceCommands.java 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,47 @@ +package org.semanticdesktop.aperture.drupalhandler; + + + +/** + * see http://www.molinesoftware.com/?q=content/drupalservicecommands + * @author Kane + */ + public enum DrupalServiceCommands { + NodeSave { + @Override + public String toString() { + return "node.save"; + } + }, + SystemConnect { + @Override + public String toString() { + return "system.connect"; + } + }, + UserLogout { + @Override + public String toString() { + return "user.logout"; + } + }, + UserLogin { + @Override + public String toString() { + return "user.login"; + } + }, + FileSave { + @Override + public String toString() { + return "file.save"; + } + }, + NewComments { + @Override + public String toString() { + return "test.count"; + } + } + + } \ No newline at end of file Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalServiceCommands.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,274 @@ +package org.semanticdesktop.aperture.drupalhandler; + + + + +import java.security.InvalidKeyException; +import java.security.NoSuchAlgorithmException; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.crypto.spec.SecretKeySpec; +import javax.crypto.Mac; +import java.nio.charset.Charset; +import com.sun.net.ssl.internal.ssl.Debug; +import java.net.URL; +import java.util.HashMap; +import java.util.Vector; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; + +/** +* see http://drupal.org/node/632844 +* @author Aaron Moline <Aar...@mo...> +*/ +public class DrupalXmlRpcService { + + // <editor-fold desc="Public Properties"> + public String ServiceURL; + public String ServiceDomain; + public String ApiKey; + public String Nonce; + public long TimeStamp; + public String APIHash; + public XmlRpcClient XmlService; + public boolean APIKey_Active; + // </editor-fold> + + // <editor-fold desc=" Constructors "> + public DrupalXmlRpcService(String serviceDomain, String apiKey, String serviceURL,boolean api_active) { + this(serviceDomain, apiKey, serviceURL); + this.APIKey_Active = api_active; + + } + + public DrupalXmlRpcService(String serviceDomain, String apiKey, String serviceURL) { + this.ServiceDomain = serviceDomain; + this.ApiKey = apiKey; + this.ServiceURL = serviceURL; + this.APIKey_Active = true; + } + // </editor-fold> + + // <editor-fold defaultstate="collapsed" desc="Private Properties"> + private String SessionID; + // </editor-fold> + + // <editor-fold defaultstate="collapsed" desc="Private Methods"> + private String GetNonce()/*(int length)*/ { + /* + * //TODO:Get None Generator Working + String allowedCharacters = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; + StringBuilder password = new StringBuilder(); + + Random rand = new Random(); + for (int i = 0; i < length; i++) + { + password.append() + //password.append(append); + } + + return password.toString(); + * */ + return ""+System.currentTimeMillis(); + } + /* + * + * @drupalServiceCommand + * + */ + private void intializeHash(DrupalServiceCommands drupalServiceCommand) { + this.Nonce = GetNonce(); + this.TimeStamp = System.currentTimeMillis(); + String hashstring = GetHashString(drupalServiceCommand.toString()); + this.APIHash = GetHMAC(hashstring); + } + + private void IntializeService() { + try { + + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + config.setServerURL(new URL(this.ServiceURL)); + XmlService = new XmlRpcClient(); + XmlService.setConfig(config); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + private String GetHashString(String serviccmd) { + StringBuilder sb = new StringBuilder(); + sb.append(""+System.currentTimeMillis()); // Time stamp + sb.append(";"); + sb.append(this.ServiceDomain);//Service Domain + sb.append(";"); + sb.append(""+System.currentTimeMillis()); //Nonce + sb.append(";"); + sb.append(serviccmd); //Service command + Debug.println("Created GetHashString: ",sb.toString()); + return sb.toString(); + } + + private Vector GetDefaultParams() { + Vector params = new Vector(); + //Drupal is setup to use Service Keys, then add the following. + if(this.APIKey_Active) + { + params.add(this.APIHash); + params.add(this.ServiceDomain); + params.add(""+this.TimeStamp); + params.add(this.Nonce); + params.add(this.SessionID); + } + return params; + } + // </editor-fold> + + // <editor-fold defaultstate="collapsed" desc="Public Methods "> + public String GetHMAC(String message) { + + Mac mac; + try { + Charset csets = Charset.forName("US-ASCII"); + SecretKeySpec keySpec = new javax.crypto.spec.SecretKeySpec(csets.encode(this.ApiKey).array(), "HmacSHA256"); + mac = javax.crypto.Mac.getInstance("HmacSHA256"); + mac.init(keySpec); + byte[] hash = mac.doFinal(csets.encode(message).array()); + + String result = ""; + for (int i = 0; i < hash.length; i++) { + result += Integer.toString((hash[i] & 0xff) + 0x100, 16).substring(1); + } + + Debug.println("Created HMAC: ", result); + return result; + + } catch (InvalidKeyException ex) { + Logger.getLogger(DrupalXmlRpcService.class.getName()).log(Level.SEVERE, null, ex); + throw new RuntimeException(ex); + } catch (NoSuchAlgorithmException ex) { + Logger.getLogger(DrupalXmlRpcService.class.getName()).log(Level.SEVERE, null, ex); + throw new RuntimeException(ex); + } + + + } + + public boolean connect() { + try { + + //Intialize Hash + intializeHash(DrupalServiceCommands.SystemConnect); + + //Intialize Service + IntializeService(); + + Debug.println("XmlService Service Intialized"," "); + + HashMap map = (HashMap)XmlService.execute(DrupalServiceCommands.SystemConnect.toString(), new Object[]{}); + + this.SessionID = (String)map.get("sessid"); + + Debug.println("Conn SessionID: ", this.SessionID); + + return true; + + } catch (Exception e) { + e.printStackTrace(); + return false; + } + } + + public boolean login(String username, String password) { + try { + intializeHash(DrupalServiceCommands.UserLogin); + + Vector params = GetDefaultParams(); + //Add Login Paramaters + params.add(username); + params.add(password); + + HashMap o = (HashMap)XmlService.execute(DrupalServiceCommands.UserLogin.toString(), params); + if(!o.isEmpty()) { + if(o.containsKey(username) && o.containsKey(password)) { + //confirmLogin(HashMap loginValue, String username, String password); + } + } + this.SessionID = (String)o.get("sessid"); + + Debug.println("Successfull Login:", o.toString()); + return true; + } catch (Exception e) { + e.printStackTrace(); + } + return false; + } + /* + public void Login(String username, String password) { + try { + IntializeHash(DrupalServiceCommands.UserLogin); + + + Vector params = GetDefaultParams(); + //Add Login Paramaters + params.add(username); + params.add(password); + + HashMap o = (HashMap)XmlService.execute(DrupalServiceCommands.UserLogin.toString(), params); + + this.SessionID = (String)o.get("sessid"); + + Debug.println("Successfull Login:", o.toString()); + + } catch (Exception e) { + System.out.println(e.toString()); + } + } +*/ + + public boolean logout() { + + try { + intializeHash(DrupalServiceCommands.UserLogout); + Vector params = GetDefaultParams(); + params.add(this.SessionID); + Object o = XmlService.execute(DrupalServiceCommands.UserLogout.toString(), params); + + Debug.println("Logout Sucessfull:",o.toString()); + return true; + } catch (Exception e) { + System.out.println(e.toString()); + return false; + } + } + + //Testing New Things Not valid + public void testFileSave() { + try { + + intializeHash(DrupalServiceCommands.FileSave); + byte[] Filebyte = new byte[10]; + Vector params = GetDefaultParams(); + params.add(Filebyte); + Object o = XmlService.execute(DrupalServiceCommands.FileSave.toString(), params); + Debug.println("Test Sucessfull:", o.toString()); + } catch (Exception e) { + System.out.println(e.toString()); + } + } + + public Object nodeSave(DrupalNode node) throws Exception { + try { + intializeHash(DrupalServiceCommands.NodeSave); + Vector params = GetDefaultParams(); + params.add(node); + Object o = XmlService.execute(DrupalServiceCommands.NodeSave.toString(), params); + Debug.println("Test Sucessfull:", o.toString()); + return o; + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + + } +} \ No newline at end of file Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceClassBean.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceClassBean.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceClassBean.java 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,120 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server.configui.bean; + +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.util.logging.Logger; + +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.util.RDFTool; +import org.ontoware.rdf2go.vocabulary.RDFS; +import org.semanticdesktop.aperture.datasource.DataSourceFactory; + +/** + * Encapsulates basic information about a data source class and + * the accompanying factory for it. + * @author <a href="mailto:Ant...@df...">Antoni Mylka</a> + */ +public class DataSourceClassBean { + + /** Logger */ + public static Logger log + = Logger.getLogger(DataSourceClassBean.class.getName()); + + final private DataSourceFactory factory; + + final private URI uri; + + private String label; + + /** + * Construct a DataSourceClassBean from the factory. + * Reads the label and URI of the datasource from the passed factory. + * @param factory + */ + public DataSourceClassBean(DataSourceFactory factory) { + this.factory=factory; + this.uri = factory.getSupportedType(); + // read the label + Model m = RDF2Go.getModelFactory().createModel(); + try { + m.open(); + if (!factory.getDescription(m)) + log.finer("Datasource: cannot get RDF description from datasource factory "+factory.getClass()+ "("+uri+")"); + this.label = RDFTool.getSingleValueString(m, uri, RDFS.label); + if (label == null) + { + log.finer("Datasource description is missing label, parsing URI as alternative"); + label = RDFTool.getLabel(uri); + } + } finally { + m.close(); + } + } + + + /** + * Returns the label. + * + * @return the label. + */ + public String getLabel() { + return label; + } + + /** + * Returns the uri. + * + * @return the uri. + */ + public URI getUri() { + return uri; + } + + /** + * Returns the uri. + * + * @return the uri. + */ + public String getUriEscaped() { + try { + return URLEncoder.encode(uri.toString(), "UTF-8"); + } catch (UnsupportedEncodingException e) { + return uri.toString(); + } + } + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceClassBean.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java 2010-02-09 19:51:10 UTC (rev 2219) @@ -0,0 +1,393 @@ +/** + * Copyright (c) 2006-2009, NEPOMUK Consortium + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the NEPOMUK Consortium nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **/ +package org.semanticdesktop.aperture.server.configui.bean; + +import java.io.IOException; +import java.io.StringReader; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.text.DateFormat; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.exception.ModelRuntimeException; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Syntax; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.vocabulary.RDF; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.server.CrawlerStateBean; +import org.semanticdesktop.aperture.server.STATE; +import org.semanticdesktop.aperture.server.configui.dao.DataSourceOntologyDAO; +import org.semanticdesktop.aperture.server.configui.exception.DataWrapperUIException; +import org.semanticdesktop.aperture.vocabulary.DATASOURCE; + +/** + * Transfers the data source information between the business and the + * presentation layer. + * + * @author <a href="mailto:Ant...@df...>Antoni Mylka</a> + */ +public class DataSourceUIBean { + + //////////////////////////////////////////////////////////////////////// + /////////////////////////// PRIVATE FIELDS ///////////////////////////... [truncated message content] |
From: <leo...@us...> - 2010-02-10 17:32:30
|
Revision: 2220 http://aperture.svn.sourceforge.net/aperture/?rev=2220&view=rev Author: leo_sauermann Date: 2010-02-10 17:32:07 +0000 (Wed, 10 Feb 2010) Log Message: ----------- Aperture-Server: added base authentification for administration tasks. This is enough security for now. Added XML-RPC server using apache XML-RPC, must still add the actual service object. Modified Paths: -------------- aperture-webserver/trunk/README.txt aperture-webserver/trunk/WebContent/WEB-INF/web.xml aperture-webserver/trunk/WebContent/config/index.jsp aperture-webserver/trunk/WebContent/config/newsource.jsp aperture-webserver/trunk/WebContent/config/sourcedetails.jsp aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/AccessResourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/CrawlControlServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/ModifySourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RefreshResourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RemoveSourceServlet.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java Added Paths: ----------- aperture-webserver/trunk/WebContent/WEB-INF/apertureserver.properties aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-server-3.1.2.jar aperture-webserver/trunk/src/org/apache/ aperture-webserver/trunk/src/org/apache/xmlrpc/ aperture-webserver/trunk/src/org/apache/xmlrpc/webserver/ aperture-webserver/trunk/src/org/apache/xmlrpc/webserver/XmlRpcServlet.properties aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/XmlRpcServletAuthenticated.java Removed Paths: ------------- aperture-webserver/trunk/WebContent/config/nowrapper.jsp aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/OpenResourceServlet.java Modified: aperture-webserver/trunk/README.txt =================================================================== --- aperture-webserver/trunk/README.txt 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/README.txt 2010-02-10 17:32:07 UTC (rev 2220) @@ -21,6 +21,19 @@ - Datasource configuration - Crawler +Installation +============ +The server is designed to run stand-alone. It can be configured using and XML-RPC interface, +and using a web-interface. In the future, more configuration options (files, etc) may come handy. + +To configure the server: +* edit WEB-INF/apertureserver.properties. Change the admin password there. +* use the web interface http://www.example.com/aperture-webserver/config/index.jsp (replace with your version) + +Once you configured it, it will have created a configuration file in the WEB-INF/config directory. +You can safely copy that file to more servers if you want, but be warned: AccessData will cause +problems when used on multiple servers. + Development =========== - checkout aperture-webserver using Eclipse >= 3.2, you need the J2EE Web Developer Tools (WST) Added: aperture-webserver/trunk/WebContent/WEB-INF/apertureserver.properties =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/apertureserver.properties (rev 0) +++ aperture-webserver/trunk/WebContent/WEB-INF/apertureserver.properties 2010-02-10 17:32:07 UTC (rev 2220) @@ -0,0 +1,2 @@ +administrator.username=admin +administrator.password=change_me \ No newline at end of file Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-server-3.1.2.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/xmlrpc-server-3.1.2.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: aperture-webserver/trunk/WebContent/WEB-INF/web.xml =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-02-10 17:32:07 UTC (rev 2220) @@ -20,6 +20,13 @@ <servlet-name>ApertureServerServlet</servlet-name> <servlet-class> org.semanticdesktop.aperture.servlet.ApertureServerServlet</servlet-class> + <init-param> + <param-name>configfile</param-name> + <param-value>WEB-INF/apertureserver.properties</param-value> + <description> + The properties file with the basic aperture server configuration. + </description> + </init-param> <load-on-startup>1</load-on-startup> </servlet> <servlet> @@ -53,15 +60,6 @@ <description> </description> <display-name> - OpenResourceServlet</display-name> - <servlet-name>OpenResourceServlet</servlet-name> - <servlet-class> - org.semanticdesktop.aperture.server.configui.servlet.OpenResourceServlet</servlet-class> - </servlet> - <servlet> - <description> - </description> - <display-name> AccessResourceServlet</display-name> <servlet-name>AccessResourceServlet</servlet-name> <servlet-class> @@ -76,6 +74,19 @@ <servlet-class> org.semanticdesktop.aperture.server.configui.servlet.RefreshResourceServlet</servlet-class> </servlet> + <servlet> + <servlet-name>XmlRpcServlet</servlet-name> + <servlet-class>org.semanticdesktop.aperture.servlet.XmlRpcServletAuthenticated</servlet-class> + <init-param> + <param-name>enabledForExtensions</param-name> + <param-value>false</param-value> + <description>Sets, whether the servlet supports vendor extensions for XML-RPC.</description> + </init-param> + </servlet> + <servlet-mapping> + <servlet-name>XmlRpcServlet</servlet-name> + <url-pattern>/xmlrpc</url-pattern> + </servlet-mapping> <servlet-mapping> <servlet-name>FileInspector</servlet-name> <url-pattern>/FileInspector</url-pattern> @@ -97,10 +108,6 @@ <url-pattern>/config/deleteSource</url-pattern> </servlet-mapping> <servlet-mapping> - <servlet-name>OpenResourceServlet</servlet-name> - <url-pattern>/config/openResource</url-pattern> - </servlet-mapping> - <servlet-mapping> <servlet-name>AccessResourceServlet</servlet-name> <url-pattern>/config/accessResource</url-pattern> </servlet-mapping> Modified: aperture-webserver/trunk/WebContent/config/index.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/index.jsp 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/WebContent/config/index.jsp 2010-02-10 17:32:07 UTC (rev 2220) @@ -8,8 +8,12 @@ <%@page import="java.util.LinkedList"%> <%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> <%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceClassBean"%> - +<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> <% + // Authenticate + if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; + DataSourceInstanceDAO dao = DataSourceDAOFactory.getInstanceDao(); if (dao == null) { RequestDispatcher rd = request.getRequestDispatcher("/nowrapper.jsp"); @@ -26,6 +30,7 @@ request.setAttribute("sourceTypes", sourcesList); %> + <html> <HEAD> <TITLE>Aperture data wrapper configuration.</TITLE> Modified: aperture-webserver/trunk/WebContent/config/newsource.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/newsource.jsp 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/WebContent/config/newsource.jsp 2010-02-10 17:32:07 UTC (rev 2220) @@ -1,9 +1,12 @@ <%@page import="java.util.logging.Logger"%> <%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> <%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> +<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> +<% + // Authenticate + if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; -<% - Logger log = Logger.getLogger(this.getClass().getName()); // Get the type to instantiate String type = request.getParameter("type"); Deleted: aperture-webserver/trunk/WebContent/config/nowrapper.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/nowrapper.jsp 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/WebContent/config/nowrapper.jsp 2010-02-10 17:32:07 UTC (rev 2220) @@ -1,23 +0,0 @@ -<%@ page language="java" contentType="text/html; charset=ISO-8859-1" - pageEncoding="ISO-8859-1"%> -<%@ page import="java.io.PrintWriter" %> -<%@ page import="java.io.StringWriter" %> -<%@ page import="org.ontoware.rdf2go.model.Model" %> - -<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> -<html> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> -<title>No active dat wrapper has been detected.</title> -</head> -<body> -<center> - No active data wrapper has been detected. - Refresh this page when the data wrapper is activated. -</center> -</body> -</html> - -<% - session.removeAttribute("error"); -%> \ No newline at end of file Modified: aperture-webserver/trunk/WebContent/config/sourcedetails.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-02-10 17:32:07 UTC (rev 2220) @@ -5,9 +5,12 @@ <%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> <%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> <%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> +<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> +<% + // Authenticate + if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; - -<% String uriString = (String)request.getParameter("uri"); if (uriString == null) { response.sendRedirect("index.jsp"); Added: aperture-webserver/trunk/src/org/apache/xmlrpc/webserver/XmlRpcServlet.properties =================================================================== Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -34,6 +34,8 @@ import java.util.List; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.datasource.DataSourceFactory; import org.semanticdesktop.aperture.detector.DataSourceDescription; import org.semanticdesktop.aperture.vocabulary.NIE; @@ -105,6 +107,31 @@ */ public static final int DEFAULT_MONITORING_THREAD_POOL_SIZE = 2; + /** + * An URI prefix that is before all configuration URIs used inside the server + */ + public static final String APERTURESERVER_URIPREFIX = "urn:aperture:server:"; + + /** + * The URI identifying the server resource in the dynamic configuration RDF model. + */ + public static final URI APERTURESERVER_CONFIGURI = new URIImpl("urn:aperture:server"); + + /** + * The URI identifying the named graph context in the modelset which is used to store the main server + */ + public static final URI APERTURESERVER_CONFIGCONTEXTURI = new URIImpl(APERTURESERVER_URIPREFIX+"configurationcontext"); + + /** + * Configuration URI for the crawler handler in the configuration RDF model. + */ + public static final URI CRAWLERHANDLER_CONFIGURI = new URIImpl(APERTURESERVER_URIPREFIX+"crawlerhandler"); + + + public static final String DATASOURCE_URIPREFIX = APERTURESERVER_URIPREFIX+"datasource:"; + + + //////////////////////////////////////////////////////////////////////// ////////////////////////////// DESTRUCTION ///////////////////////////// //////////////////////////////////////////////////////////////////////// @@ -549,4 +576,32 @@ */ public void removeStateListener(ApertureServerStateListener listener) ; + /** + * Read the current configuration of the crawler handler. + * The result is returned as RDF, in the serialization format chosen in the mimetype parameter. + * The configuration is stored in a separate named graph, identified as {@link #CRAWLERHANDLER_CONFIGURI}. + * @param mimeType the mimetype to use for serialization + * @return the configuration as string. The URI is {@link #CRAWLERHANDLER_CONFIGURI} ({@value #CRAWLERHANDLER_CONFIGURI}) + */ + public String getCrawlerHandlerConfig(String mimeType); + + /** + * Set the new Crawler handler config as RDF. + * @param newRDFConfiguration new configuration of the crawler handler. + * you must use the {@link #CRAWLERHANDLER_CONFIGURI} ({@value #CRAWLERHANDLER_CONFIGURI}) as subject of the + * statements. + * @param mimeType the mime type of the serialization. See the documentation + * for {@link RDFRepository} interface for possible values. + */ + public void setCrawlerHandlerConfig(String newRDFConfiguration, String mimeType); + + + /** + * For all server administration tasks, servlets should use this username/password combination + * to authenticate the user + * @param username plaintext (must not contain special characters such as ":" + * @param password in plaintext + * @return true, if the user is an authenticated administrator + */ + public boolean authenticateAdministrator(String username, String password); } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/AccessResourceServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/AccessResourceServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/AccessResourceServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -45,6 +45,7 @@ import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; import org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO; +import org.semanticdesktop.aperture.servlet.ApertureServerServlet; /** * Servlet for opening resources. @@ -59,6 +60,10 @@ protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + // authenticate + if (!ApertureServerServlet.authenticateAdministrator(req, resp, getServletContext())) + return; + String uriEncoded = req.getParameter("uri"); String response = null; if (uriEncoded != null) { Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/CrawlControlServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/CrawlControlServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/CrawlControlServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -42,6 +42,7 @@ import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; import org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO; +import org.semanticdesktop.aperture.servlet.ApertureServerServlet; /** * Servlet for controlling crawlers. Startring stopping etc. @@ -60,7 +61,7 @@ @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - resp.sendRedirect("index.jsp"); + doPost(req, resp); } /** @@ -71,6 +72,10 @@ @Override protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + // authenticate + if (!ApertureServerServlet.authenticateAdministrator(req, resp, getServletContext())) + return; + String uriEncoded = req.getParameter("uri"); String action = req.getParameter("action"); if (uriEncoded != null && action != null) { Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/ModifySourceServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/ModifySourceServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/ModifySourceServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -48,6 +48,7 @@ import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; import org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO; import org.semanticdesktop.aperture.server.configui.exception.ValidationFailedException; +import org.semanticdesktop.aperture.servlet.ApertureServerServlet; /** * Servlet for modifying the configuration of data sources. @@ -73,7 +74,7 @@ @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - resp.sendRedirect("index.jsp"); + doPost(req, resp); } /** @@ -85,6 +86,10 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + // authenticate + if (!ApertureServerServlet.authenticateAdministrator(req, resp, getServletContext())) + return; + String typeUri = req.getParameter(TYPE_URI_ATTRIBUTE); String sourceUri = req.getParameter(SOURCE_URI_ATTRIBUTE); Deleted: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/OpenResourceServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/OpenResourceServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/OpenResourceServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -1,67 +0,0 @@ -/** - * Copyright (c) 2006-2009, NEPOMUK Consortium - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the NEPOMUK Consortium nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - **/ -package org.semanticdesktop.aperture.server.configui.servlet; - -import java.io.IOException; -import java.net.URLDecoder; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; -import org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO; - -/** - * Servlet for opening resources. - * @author <a href="mailto:ant...@df...">Antoni Mylka</a> - */ -public class OpenResourceServlet extends HttpServlet { - - /** the serial version UID */ - private static final long serialVersionUID = 5329327554120275250L; - - @Override - protected void service(HttpServletRequest req, HttpServletResponse resp) - throws ServletException, - IOException { - String uriEncoded = req.getParameter("uri"); - if (uriEncoded != null) { - String uri = URLDecoder.decode(uriEncoded, "UTF-8"); - DataSourceInstanceDAO dao = DataSourceDAOFactory.getInstanceDao(); - dao.openResource(uri); - } - resp.sendRedirect("index.jsp"); - } -} Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RefreshResourceServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RefreshResourceServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RefreshResourceServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -42,6 +42,7 @@ import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; import org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO; +import org.semanticdesktop.aperture.servlet.ApertureServerServlet; /** * Servlet for opening resources. @@ -56,6 +57,10 @@ protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + // authenticate + if (!ApertureServerServlet.authenticateAdministrator(req, resp, getServletContext())) + return; + String uriEncoded = req.getParameter("uri"); if (uriEncoded != null) { try { Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RemoveSourceServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RemoveSourceServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/servlet/RemoveSourceServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -42,6 +42,7 @@ import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; import org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO; +import org.semanticdesktop.aperture.servlet.ApertureServerServlet; /** * Servlet for removing data sources. @@ -61,7 +62,7 @@ @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - resp.sendRedirect("index.jsp"); + doPost(req, resp); } @@ -69,6 +70,11 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + // authenticate + if (!ApertureServerServlet.authenticateAdministrator(req, resp, getServletContext())) + return; + + String uriEncoded = req.getParameter("uri"); if (uriEncoded != null) { String uri = URLDecoder.decode(uriEncoded, "UTF-8"); Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -42,7 +42,9 @@ import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import java.util.Properties; import java.util.Set; +import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -103,6 +105,53 @@ * */ public class ServerImpl implements ApertureServer, ServerMXBean { + + public static final String CFG_ADMINISTRATOR_USERNAME = "administrator.username"; + public static final String CFG_ADMINISTRATOR_PASSWORD = "administrator.password"; + + /** + * Configuration parameters passed to the server. + * They are partly read from a properties file. + * @author sauermann + * + */ + public static class ServerConfigParameters { + /** + * @param configurationDirectory The directory that will store the temporary + * files generated by this server. + */ + File configurationDirectory; + /** + * plaintext (must not contain special characters such as ":" + */ + String administratorUsername; + /** + * plaintext + */ + String administratorPassword; + + Properties properties; + + File propertiesFile; + + public ServerConfigParameters(File configurationDirectory, File propertiesFile) throws IOException { + this.configurationDirectory = configurationDirectory; + + properties = new Properties(); + FileInputStream fin = new FileInputStream(propertiesFile); + try { + properties.load(fin); + } finally { + fin.close(); + } + this.propertiesFile = propertiesFile; + this.administratorUsername = properties.getProperty(CFG_ADMINISTRATOR_USERNAME); + this.administratorPassword = properties.getProperty(CFG_ADMINISTRATOR_PASSWORD); + } + + + } + //////////////////////////////////////////////////////////////////////// //////////////////////////// STATIC CONSTANTS ////////////////////////// @@ -138,20 +187,10 @@ public static Logger log = Logger.getLogger(ServerImpl.class.getName()); - /** The main uri of the configuration of this server */ - private URI mainURI; - - /** - * The context in the configuration repository where the configuration of - * the server is stored. - */ - private URI configurationContextURI; - /** - * Prefix used for data source uri generation, computed from the mainURI. - * See {@link #computeDataSourceURIPrefix(URI)} + * The server configuration parameters (fixed) */ - private String dataSourceURIPrefix; + final private ServerConfigParameters configParameters; /** * The ModelSet that stores the configuration data. @@ -220,38 +259,32 @@ /** * Initializes the Aperture Server. * - * @param nepomukRepository The RDFRepository this server is supposed to - * interact with. The server will read the initial configuration - * from this repository and any changes to the configuration - * (addition/modification/deletion of data sources) will be reflected - * in this repository. This repository is also used to store the - * information extracted from the data sources. - * @param mainURIString The main URI of the configuration information. Every - * data source configured for this server is is attached to this - * central URI - * @param configurationContextString the context within the configuration - * model set where the configuration of this server is stored * @param registries The object that encapsulates all aperture registries. * The interface for the aperture framework. - * @param configurationDirectory The directory that will store the temporary - * files generated by this server. * @param factory an RDF2Go ModelFactory instance used to create temporary * models. * @throws ApertureServerException If the initialization failed. */ public ServerImpl( - String mainURIString, - String configurationContextString, + ServerConfigParameters parameters, ApertureRegistriesAggregate registries, - File configurationDirectory, + ModelFactory factory) throws ApertureServerException { log.entering(ServerImpl.class.getName(),"init"); + this.configParameters = parameters; + + // check default password + if ("change_me".equals(configParameters.administratorPassword)) + log.severe("Your ApertureServer is using the default password 'change_me'. " + + "This is a security threat. Malicious users can spy on your configured datasources and your passwords. " + + "Set this in file "+configParameters.propertiesFile); + // Initialize the configuration model // using a file-backed config modelset { - configurationFile = new File(configurationDirectory, "dsconfig.trig"); + configurationFile = new File(parameters.configurationDirectory, "dsconfig.trig"); configurationModelSet = factory.createModelSet(); configurationModelSet.open(); if (configurationFile.exists()) { @@ -269,20 +302,8 @@ } } - // convert the uri string into an URI instance - try { - this.mainURI = configurationModelSet.createURI(mainURIString); - this.configurationContextURI = configurationModelSet.createURI(configurationContextString); - this.dataSourceURIPrefix = computeDataSourceURIPrefix(mainURI); - } catch (ModelRuntimeException me) { - log.log(Level.SEVERE, - "The root URI or the configuration context URI for the " + - "server configuration is faulty",me); - throw new ApertureServerException(me); - } - // initialize the configuration model - configurationModel = configurationModelSet.getModel(configurationContextURI); + configurationModel = configurationModelSet.getModel(APERTURESERVER_CONFIGCONTEXTURI); // this model is closed in close() configurationModel.open(); @@ -297,7 +318,7 @@ // initialize the data source collection try { dataSourcePool = DataSourcePool - .initializeFromDirectory(configurationDirectory,this); + .initializeFromDirectory(configParameters.configurationDirectory,this); } catch (IOException ioe){ log.log(Level.SEVERE, "Information about the data sources couldn't be read",ioe); @@ -351,21 +372,6 @@ return DEFAULT_MONITORING_THREAD_POOL_SIZE; } - /** - * Computes the data source uri prefix from the main uri. - * - * @param uri - * @return the string with the data source uri prefix - */ - private String computeDataSourceURIPrefix(URI uri) { - String result = uri.toString(); - // an uri can only contain one hash - int index = result.lastIndexOf('#'); - if (index != -1) { - return result.replaceAll("#", "."); - } - return result; - } /** * Retrieves the data source configurations from the configuration @@ -458,7 +464,7 @@ // and here we are sure that the string is ok, and nothing can happen configurationModel.addAll(tempModel.iterator()); - configurationModel.addStatement(mainURI, DATA_SOURCE_CONFIG_URI, uri); + configurationModel.addStatement(APERTURESERVER_CONFIGURI, DATA_SOURCE_CONFIG_URI, uri); saveConfigurationFile(); synchronizeDataSourcePool(dataSourcePool); @@ -510,7 +516,7 @@ // remove removeConfigurationStatements(datasourceURI); // add - configurationModel.addStatement(mainURI, DATA_SOURCE_CONFIG_URI, datasourceURI); + configurationModel.addStatement(APERTURESERVER_CONFIGURI, DATA_SOURCE_CONFIG_URI, datasourceURI); // and here we are sure that the string is ok, and nothing can happen configurationModel.addAll(tempModel.iterator()); @@ -639,9 +645,9 @@ // ugly hack, (23.06.2007) the configuration model threw an // exception if I tried to remove a non-existent statement - if (configurationModel.contains(mainURI, DATA_SOURCE_CONFIG_URI, datasourceURI)) { + if (configurationModel.contains(APERTURESERVER_CONFIGURI, DATA_SOURCE_CONFIG_URI, datasourceURI)) { configurationModel.removeStatement( - mainURI, + APERTURESERVER_CONFIGURI, DATA_SOURCE_CONFIG_URI, datasourceURI); } @@ -695,7 +701,7 @@ boolean result = false; try { - result = configurationModel.contains(mainURI, + result = configurationModel.contains(APERTURESERVER_CONFIGURI, DATA_SOURCE_CONFIG_URI, uri); } catch (ModelRuntimeException me) { log.log(Level.SEVERE,"Couldn't determine if a data source is " + @@ -709,12 +715,10 @@ */ public String generateDataSourceUri() { // previously: using a counter proved to be faulty when crashed... + // and if we use a counter, it will mix with old crawled information, which is also bad. + // hence: a UUID. String result = null; - int count = 0; - do { - result = dataSourceURIPrefix + "/" + count; - count++; - } while (isARegisteredDataSource(new URIImpl(result))); + result = DATASOURCE_URIPREFIX + UUID.randomUUID().toString(); return result; } @@ -724,7 +728,7 @@ */ private void synchronizeDataSourcePool(DataSourcePool pool) { pool.synchronizeDataSources( - getDataSourceConfigurations(configurationModel, mainURI), + getDataSourceConfigurations(configurationModel, APERTURESERVER_CONFIGURI), crawlingThreadPool); } @@ -741,7 +745,7 @@ try { iterator = configurationModel.findStatements( - mainURI,DATA_SOURCE_CONFIG_URI,Variable.ANY); + APERTURESERVER_CONFIGURI, DATA_SOURCE_CONFIG_URI,Variable.ANY); while (iterator.hasNext()) { Statement statement = iterator.next(); Node node = statement.getObject(); @@ -771,10 +775,10 @@ String query = "SELECT ?dataUri " + "WHERE {" + - " <" + mainURI.toString() + "> " + - " <" + DATA_SOURCE_CONFIG_URI.toString() +"> " + + " " + APERTURESERVER_CONFIGURI.toSPARQL() + " " + + " " + DATA_SOURCE_CONFIG_URI.toSPARQL() +" " + " ?dataUri ." + - " ?dataUri <" + RDF.type.toString() + "> " + + " ?dataUri " + RDF.type.toSPARQL() + " " + " <" + typeURI + ">" + "}"; @@ -929,7 +933,7 @@ try { TriplePattern pattern = configurationModel.createTriplePattern( - mainURI,DATA_SOURCE_CONFIG_URI,Variable.ANY); + APERTURESERVER_CONFIGURI,DATA_SOURCE_CONFIG_URI,Variable.ANY); result = configurationModel.countStatements(pattern); } catch (ModelRuntimeException me) { log.log(Level.WARNING,"Couldn't count the data sources",me); @@ -1371,6 +1375,21 @@ super.finalize(); } + public boolean authenticateAdministrator(String username, String password) { + return configParameters.administratorUsername.equals(username) && configParameters.administratorPassword.equals(password); + } + + public String getCrawlerHandlerConfig(String mimeType) { + // TODO Auto-generated method stub + return null; + } + + public void setCrawlerHandlerConfig(String newRDFConfiguration, + String mimeType) { + // TODO Auto-generated method stub + + } + } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java 2010-02-09 19:51:10 UTC (rev 2219) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -1,13 +1,16 @@ package org.semanticdesktop.aperture.servlet; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.lang.management.ManagementFactory; +import java.util.Properties; +import java.util.StringTokenizer; import javax.management.MBeanServer; -import javax.management.MalformedObjectNameException; import javax.management.ObjectName; import javax.servlet.ServletConfig; +import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -21,11 +24,13 @@ import org.semanticdesktop.aperture.mime.identifier.impl.DefaultMimeTypeIdentifierRegistry; import org.semanticdesktop.aperture.opener.impl.DefaultDataOpenerRegistry; import org.semanticdesktop.aperture.runtime.ApertureRuntime; -import org.semanticdesktop.aperture.server.ApertureServerException; +import org.semanticdesktop.aperture.server.ApertureServer; import org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory; import org.semanticdesktop.aperture.server.impl.ApertureRegistriesAggregate; import org.semanticdesktop.aperture.server.impl.ServerImpl; +import sun.misc.BASE64Decoder; + /** * Servlet implementation class for Servlet: ApertureServerServlet * @@ -33,9 +38,31 @@ public class ApertureServerServlet extends javax.servlet.http.HttpServlet implements javax.servlet.Servlet { static final long serialVersionUID = 1L; + + /** + * The attribute used to read/write the ApertureServer instance in the ServletContext + */ + public static final String APERTURESERVER_ATTR = "apertureserver"; ServerImpl server; + /** + * Read the ApertureServer from the servlet container context. + * This is valid for all JSPs and Servlets in this application. + * @param context the ServletContext + * @return the ApertureServer. + * @throws NullPointerException if the server was not initialized yet + */ + public static ApertureServer getApertureServer(ServletContext context) { + Object result = context.getAttribute(APERTURESERVER_ATTR); + if (result == null) + throw new NullPointerException("cannot read "+APERTURESERVER_ATTR+" from ServletContext"); + if (!(result instanceof ApertureServer)) + throw new ClassCastException("Attribute "+APERTURESERVER_ATTR+" from ServletContext is not a "+ + ApertureServer.class.getSimpleName()); + return (ApertureServer)result; + } + /* * (non-Java-doc) * @@ -76,6 +103,33 @@ public void init(ServletConfig config) throws ServletException { super.init(config); try { + // read the config file and find the folder + ServerImpl.ServerConfigParameters parameters = null; + { + // config properties + + String configfilename = config.getInitParameter("configfile"); + if (configfilename == null) { + configfilename = "WEB-INF/apertureserver.properties"; + getServletContext().log("configfile property missing for ApertureServerServlet. Using "+configfilename+" as fallback"); + } + File configfile = new File(configfilename); + if (!configfile.isAbsolute()) + configfile = new File(getServletContext().getRealPath(configfilename)); + if (!configfile.exists()) + throw new Exception("Cannot read config file from "+configfile); + + // config folder + File configfolder = new File( + getServletContext().getRealPath("WEB-INF/config")); + if (!configfolder.exists()) + if (!configfolder.mkdirs()) + throw new Exception("Cannot create config folder: "+configfolder); + getServletContext().log("using configuration folder "+configfolder); + + parameters = new ServerImpl.ServerConfigParameters(configfolder, configfile); + } + ApertureRuntime runtime = new ApertureRuntime(); ApertureRegistriesAggregate registries = new ApertureRegistriesAggregate(); registries.setCrawlerRegistry(new DefaultCrawlerRegistry()); @@ -88,14 +142,8 @@ registries.setMimeTypeIdentifierRegistry(new DefaultMimeTypeIdentifierRegistry()); //registries.setTrustDeciderRegistry(??) //registries.setTrustManagerRegistry(??) - File configfolder = new File( - getServletContext().getRealPath("WEB-INF/config")); - if (!configfolder.exists()) - if (!configfolder.mkdirs()) - throw new Exception("Cannot create config folder: "+configfolder); - getServletContext().log("using configuration folder "+configfolder); - server = new ServerImpl("uri:aperture:server", "uri:aperture:server:configurationcontext", - registries, configfolder, RDF2Go + + server = new ServerImpl(parameters, registries, RDF2Go .getModelFactory()); } catch (Exception e) { @@ -103,6 +151,8 @@ e); throw new ServletException(e); } + + // register MBeans MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); ObjectName name; try { @@ -113,10 +163,97 @@ e); throw new ServletException(e); } + + // register with Servlet Container + getServletContext().setAttribute(APERTURESERVER_ATTR, server); + // Share global variables statically with JSPs. this is a hack done ages ago for OSGI, // it can safely be replaced by using Servlet variables. + // TODO: can be deprecated in favor of #getApertureServer DataSourceDAOFactory.setApertureServer(server); + } + + /** + * Take the passed request and response and authenticate if the user is logged in using + * the administrator username and password. If the user is <b>not authenticated</b>, + * the response <b>will be rewritten</b> to an authentification challenge and false will + * be returned. You should then <b>not</b> modify the response anymore. You should + * also not modify the response before calling this method. + * If the user is authenticated, true will be returned. + * <i>inspired by <a href="http://www.alemoi.com/dev/httpaccess/">alemoi HTTP access code snippet</a></i> + * @param request the request to authenticate + * @param response the response. If not authenticated, it will be modified. Otherwise, it is not used. + * @param servletContext the servlet context + * @return true, if the user is authenticated and administration access can proceed. + * False otherwise, then the response must be left untouched. + */ + public static boolean authenticateAdministrator(HttpServletRequest request, + HttpServletResponse response, ServletContext servletContext) { + String userID = null; + String password = null; + // Assume not valid until proven otherwise + + boolean valid = false; + + // Get the Authorization header, if one was supplied + + String authHeader = request.getHeader("Authorization"); + if (authHeader != null) { + StringTokenizer st = new StringTokenizer(authHeader); + if (st.hasMoreTokens()) { + String basic = st.nextToken(); + + // We only handle HTTP Basic authentication + if (basic.equalsIgnoreCase("Basic")) { + String credentials = st.nextToken(); + + // This example uses sun.misc.* classes. + // You will need to provide your own + // if you are not comfortable with that. + + BASE64Decoder decoder = new BASE64Decoder(); + try { + String userPass = new String(decoder + .decodeBuffer(credentials)); + // The decoded string is in the form + // "userID:password". + int p = userPass.indexOf(":"); + if (p != -1) { + userID = userPass.substring(0, p); + password = userPass.substring(p + 1); + // Validate user ID and password + // and set valid true true if valid. + // In this example, we simply check + // that neither field is blank + valid = ApertureServerServlet.getApertureServer(servletContext) + .authenticateAdministrator(userID, password); + } + } catch (IOException e) { + servletContext.log("cannot authenticate request: "+e, e); + } + } + } + } + + // If the user was not validated, fail with a + // 401 status code (UNAUTHORIZED) and + // pass back a WWW-Authenticate header for + // this servlet. + // + // Note that this is the normal situation the + // first time you access the page. The client + // web browser will prompt for userID and password + // and cache them so that it doesn't have to + // prompt you again. + + if (!valid) { + String s = "Basic realm=\"Aperture Server\""; + response.setHeader("WWW-Authenticate", s); + response.setStatus(401); + } + return valid; } + } \ No newline at end of file Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/XmlRpcServletAuthenticated.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/XmlRpcServletAuthenticated.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/XmlRpcServletAuthenticated.java 2010-02-10 17:32:07 UTC (rev 2220) @@ -0,0 +1,49 @@ +/** + * + */ +package org.semanticdesktop.aperture.servlet; + +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.XmlRpcRequest; +import org.apache.xmlrpc.common.XmlRpcHttpRequestConfig; +import org.apache.xmlrpc.metadata.XmlRpcSystemImpl; +import org.apache.xmlrpc.server.AbstractReflectiveHandlerMapping; +import org.apache.xmlrpc.server.PropertyHandlerMapping; +import org.apache.xmlrpc.server.XmlRpcHandlerMapping; +import org.apache.xmlrpc.webserver.XmlRpcServlet; + +/** + * Authentification in XML-RPC. + * according to the XML-RPC documentation. + * @author sauermann + * + */ +public class XmlRpcServletAuthenticated extends XmlRpcServlet { + + private boolean isAuthenticated(String pUserName, String pPassword) { + return ApertureServerServlet.getApertureServer(getServletContext()).authenticateAdministrator(pUserName, pPassword); + } + + protected XmlRpcHandlerMapping newXmlRpcHandlerMapping() throws XmlRpcException { + PropertyHandlerMapping mapping + = (PropertyHandlerMapping) super.newXmlRpcHandlerMapping(); + + // authentification + AbstractReflectiveHandlerMapping.AuthenticationHandler handler = + new AbstractReflectiveHandlerMapping.AuthenticationHandler(){ + public boolean isAuthorized(XmlRpcRequest pRequest){ + XmlRpcHttpRequestConfig config = + (XmlRpcHttpRequestConfig) pRequest.getConfig(); + return isAuthenticated(config.getBasicUserName(), + config.getBasicPassword()); + }; + }; + mapping.setAuthenticationHandler(handler); + + // introspection + XmlRpcSystemImpl.addSystemHandler(mapping); + + return mapping; + } + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/XmlRpcServletAuthenticated.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-02-16 19:07:33
|
Revision: 2221 http://aperture.svn.sourceforge.net/aperture/?rev=2221&view=rev Author: leo_sauermann Date: 2010-02-16 19:07:24 +0000 (Tue, 16 Feb 2010) Log Message: ----------- aperture-webserver: trying an XML log of the datasources. XML sucks. Modified Paths: -------------- aperture-webserver/trunk/.classpath Added Paths: ----------- aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1-license.txt aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1.jar aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java aperture-webserver/trunk/src.test/ aperture-webserver/trunk/src.test/org/ aperture-webserver/trunk/src.test/org/semanticdesktop/ aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/ aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/ aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/DataSourceLogTest.java Modified: aperture-webserver/trunk/.classpath =================================================================== --- aperture-webserver/trunk/.classpath 2010-02-10 17:32:07 UTC (rev 2220) +++ aperture-webserver/trunk/.classpath 2010-02-16 19:07:24 UTC (rev 2221) @@ -1,8 +1,10 @@ <?xml version="1.0" encoding="UTF-8"?> <classpath> <classpathentry kind="src" path="src"/> + <classpathentry kind="src" path="src.test"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.eclipse.jst.server.core.container/org.eclipse.jst.server.tomcat.runtimeTarget/Apache Tomcat v5.5"/> <classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.web.container"/> + <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3"/> <classpathentry kind="output" path="build/classes"/> </classpath> Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1-license.txt =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1-license.txt (rev 0) +++ aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1-license.txt 2010-02-16 19:07:24 UTC (rev 2221) @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1-license.txt ___________________________________________________________________ Added: svn:eol-style + native Added: aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1.jar =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/WebContent/WEB-INF/lib/commons-collections-3.2.1.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp 2010-02-16 19:07:24 UTC (rev 2221) @@ -0,0 +1,111 @@ +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> +<%@ taglib prefix="aperture" uri="http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib"%> +<%@page import="java.net.URLDecoder"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> +<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> +<%@page import="org.semanticdesktop.aperture.server.ApertureServer"%> +<%@page import="org.semanticdesktop.aperture.server.CrawlerStateBean"%> +<%@page import="org.semanticdesktop.aperture.server.DataSourceLog"%> + +<% +/** + * render the detail statistics of a datasource + * @author leo sauermann + */ + +// Authenticate +if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; + +// Get URI of datasource +String uriString = (String)request.getParameter("uri"); +if (uriString == null) { + throw new ServletException("parameter 'uri' identifying datasource missing"); +} +String uriDecoded = URLDecoder.decode(uriString,"UTF-8"); + +ApertureServer server = ApertureServerServlet.getApertureServer(getServletContext()); +CrawlerStateBean crawlerstate = server.getDetailedCrawlerState(uriDecoded); +if (crawlerstate != null) + request.setAttribute("crawlerstate", crawlerstate); +DataSourceLog dslog = server.getDataSourceLog(uriDecoded); +%> + + +<html> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> + <title>Details for ${bean.name}</title> + <link title="default" rel="stylesheet" type="text/css" + href="style.css" media="screen" /> +</head> +<body> +<% if (bean == null) { %> + <center> + <h1>Data source not found</h1> + <h2><a href="index.jsp">Back to the sources list</a></h2> + </center> +<% } else { %> + <h1>Details of '${bean.name}' data source</h1> + + <h2>Basic data source information</h2> + <%= fresnelEditor.createCommonFormPart() %> + <TR> + <TD class="rowheader">Status:</TD> + <aperture:statuscell state="${bean.crawlingState}"/> + </TR> + </table> + + <h2>Detailed data source configuration</h2> + + <%= fresnelEditor.createSpecificFormPart() %> + + <h2><aperture:crawlbutton bean="${bean}"/></h2> + + <%if (bean.getLastErrorMessage() != null) {%> + <h2>Error message</h2> + <table class="sourcedetails"> + <TR> + <TD class="errormsg"> + <pre>${bean.lastErrorMessage}</pre> + </TD> + </TR> + </table> + <%} %> + + <h2>Crawl report</h2> + <table > + <TR> + <TD class="rowheader">Crawl started:</TD> + <td class="configinput">${bean.crawlStartedString}</td> + </TR> + <TR> + <TD class="rowheader">Crawl stopped:</TD> + <td class="configinput">${bean.crawlStoppedString}</td> + </TR> + <TR> + <TD class="rowheader">New objects:</TD> + <td class="configinput">${bean.newObjects}</td> + </TR> + <TR> + <TD class="rowheader">Changed objects:</TD> + <td class="configinput">${bean.modifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Unchanged objects:</TD> + <td class="configinput">${bean.unmodifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Removed objects:</TD> + <td class="configinput">${bean.deletedObjects}</td> + </TR> + </table> + + <h2><a href="index.jsp">Back to the data sources list</a></h2> +<% bean.dispose(); + } %> +</body> +</html> Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java 2010-02-16 19:07:24 UTC (rev 2221) @@ -0,0 +1,336 @@ +/** + * + */ +package org.semanticdesktop.aperture.server; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.io.StringReader; +import java.util.Collection; +import java.util.LinkedList; +import java.util.logging.Level; +import java.util.logging.LogRecord; +import java.util.logging.Logger; +import java.util.logging.StreamHandler; +import java.util.logging.XMLFormatter; + +import org.apache.commons.collections.buffer.CircularFifoBuffer; +import org.xml.sax.Attributes; +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.helpers.XMLReaderFactory; + +/** + * Log Messages of a DataSource. + * + * This class is used both to log messages (get the logger) + * and also to read the log messages. + * + * To see if warnings or errors were reported. + * + * The question is: how to store and read logging messages. + * What do we have? + * We use http://www.slf4j.org/index.html for logging. That does only wrap another log framework. + * Commons-logging does also only wrap: http://commons.apache.org/logging/commons-logging-1.1.1/index.html + * log4j 1.2 does not parse logfiles http://logging.apache.org/log4j/1.2/index.html + * JUL does not parse neither. + * + * + * So the solution for this problem, using off-the-shelf software, is to use the JUL logger + * and the XMLFormatter, as this log format is at least documented and parsable. + * I have to write the parser myself, though. + * + * Note: I tried to expose only a slf4j logger, and use the JUL Logger only internally, + * but the bastards from SLF4J made the constructor new JDK14LoggerAdapter(loggerJUL) private, + * so this "oh so standardized and properly programmed" library cannot be used to wrap a JUL logger. + * For what do we use slf4j anyway? + * + * @author sauermann + */ +public class DataSourceLog { + + /** + * What should be logged into the file? Everything above CONFIG + */ + static final Level TRESHOLD = Level.CONFIG; + + Logger loggerJUL; + + /** + * String identifying the datasource to which this log belongs + */ + final String dataSourceUri; + + /** + * logfile that is to be used. + */ + final File logfile; + + /** + * Output log formatter + */ + XMLFormatter xmlformatter; + /** + * output log file handler + */ + StreamHandler xmlfilehandler; + + /** + * A printstream to autoflush, because the xmlfilestream does not have this + */ + PrintStream xmlfilestreamflusher; + + /** + * the file to write to + */ + FileOutputStream xmlfilestream; + + /** + * A dummy entity resolver to get around the problem that every xml parsing + * involves dereferencing DTDs + */ + final static EntityResolver dummyEntityResolver = new EntityResolver() { + public InputSource resolveEntity(String publicID, String systemID) + throws SAXException { + return new InputSource(new StringReader("")); + } + }; + + /** + * Oh pots of gold, there is no parser for the "standard" xml format created by JUL. + * How standard is that? + * + * This class has a serious flaw: when the log file gets BIG, it has to load the whole log + * file into memory, which is a bit stupid. I am not going to do that better, if you want, + * go ahead. + * + * According to + * <a href="http://java.sun.com/j2se/1.4.2/docs/guide/util/logging/overview.html#3.0">DTD</a>, + * this is to do: + * date, millis, sequence, logger?, level, + * class?, method?, thread?, message, key?, catalog?, param*, exception? + * + * @author sauermann + * + */ + private class LogParser extends DefaultHandler { + + + + /** + * As CircularFifoBuffer is not generic, this is also not generic + */ + @SuppressWarnings("unchecked") + final Collection buffer; + + LogRecord cur = null; + + StringBuffer curValue = null; + String curException = null; + + final Level minLevel; + + public LogParser(int length, Level minLevel) { + if (length>=0) + this.buffer = new CircularFifoBuffer(length); + else + this.buffer = new LinkedList<Object>(); + this.minLevel = minLevel; + + } + + @SuppressWarnings("unchecked") + public Collection<LogRecord> parse(File file) throws Exception { + /* + SAXParserFactory spf = SAXParserFactory.newInstance(); + spf.setValidating(false); + spf.setXIncludeAware(false); + SAXParser parser = spf.newSAXParser(); + parser.getXMLReader().setEntityResolver(dummyEntityResolver); + parser.getXMLReader().setContentHandler(this); + //reader.setErrorHandler(new MyErrorHandler()); + parser.parse(file, this); + */ + XMLReader reader = XMLReaderFactory.createXMLReader(); + reader.setErrorHandler(new ErrorHandler() { + + public void error(SAXParseException exception) + throws SAXException { + // TODO Auto-generated method stub + + } + + public void fatalError(SAXParseException exception) + throws SAXException { + // TODO Auto-generated method stub + + } + + public void warning(SAXParseException exception) + throws SAXException { + // TODO Auto-generated method stub + + } + + }); + reader.setEntityResolver(dummyEntityResolver); + reader.setContentHandler(this); + FileInputStream fin = new FileInputStream(file); + try { + reader.parse(new InputSource(fin)); + } finally { + fin.close(); + } + + return (Collection<LogRecord>)buffer; + } + + @Override + public void endElement(String uri, String localName, String name) + throws SAXException { + if ("record".equals(localName)) { + // add the current element to the buffer + if (cur == null) + throw new SAXException("closing 'record' was missing an opening 'record'"); + // only return those above the required log level + if (cur.getLevel().intValue() >= minLevel.intValue()) + buffer.add(cur); + cur = null; + curValue = null; + curException = null; + } else { + if (curValue == null) // empty + return; + if (cur == null) + throw new SAXException("handling close of '"+localName+"': missing open element 'record'"); + + // trim curValue, this is always a problem with newlines + String val = curValue.toString().trim(); + + // "date" - not parse it, its not in the logrecord + // "sequence" - not + // class?, method?, thread? - not + // key?, catalog?, param* - not + if ("millis".equals(localName)) { + long millis = Long.parseLong(val); + cur.setMillis(millis); + } else if ("logger".equals(localName)) { + cur.setLoggerName(val); + } else if ("level".equals(localName)) { + cur.setLevel(Level.parse(val)); + } else if ("message".equals(localName)) { + // are we inside an exception? + if (curException != null) + curException = val; + else + cur.setMessage(val); + } else if ("exception".equals(localName)) + curException = null; + + // the curValue was read, free it + curValue = null; + } + + } + + @Override + public void characters(char[] ch, int start, int length) + throws SAXException { + if (curValue == null) + curValue = new StringBuffer(new String(ch, start, length)); + else + curValue.append(ch, start, length); + } + + /** + * Parse according to + */ + @Override + public void startElement(String uri, String localName, String name, + Attributes attributes) throws SAXException { + if ("record".equals(localName)) + cur = new LogRecord(Level.ALL, ""); + else if ("exception".equals(localName)) + curException = ""; + } + } + + /** + * Create a new DataSourceLog instance. It will create internally the logger instance + * which can be used to log messages. + * @param dataSourceUri the data source uri whose log messages will be logged + * @param logfile the logfile to use + * @throws FileNotFoundException if the passed logfile cannot be created + */ + public DataSourceLog(String dataSourceUri, File logfile) throws FileNotFoundException { + this.dataSourceUri = dataSourceUri; + this.logfile = logfile; + // create the JUL Logger + loggerJUL = Logger.getLogger(getClass().getName()+"."+dataSourceUri); + // add handler + xmlformatter = new XMLFormatter(); + xmlfilestream = new FileOutputStream(logfile, true); + xmlfilestreamflusher = new PrintStream(xmlfilestream, true); + xmlfilehandler = new StreamHandler(xmlfilestreamflusher, xmlformatter) { + + /** + * yep, its ugly, but flushing from the outside of this would be even worse. + */ + @Override + public synchronized void publish(LogRecord record) { + super.publish(record); + flush(); + } + + }; + loggerJUL.addHandler(xmlfilehandler); + loggerJUL.setLevel(TRESHOLD); // to be safe, set on both + xmlfilehandler.setLevel(TRESHOLD); // to be safe, set on both + } + + + /** + * Read <b>length</b> log records that are equal or beyond log level "minlevel", + * both parameters are optional. + * @param length read that many records, but not more. if -1, then "read all" + * @param minLevel read beyond that level. if null, return all. + * @return a collection of n elements, n<=length. can be empty, but not null + */ + public Collection<LogRecord> read(int length, Level minLevel) throws Exception { + if (minLevel == null) + minLevel = Level.ALL; + LogParser parser = new LogParser(length, minLevel); + return parser.parse(logfile); + } + + /** + * GEt the JUL Logger + * @return the JUL Logger + */ + public Logger getLoggerJUL() { + return loggerJUL; + } + + + @Override + protected void finalize() throws Throwable { + if (xmlfilestream != null) { + try { + xmlfilestream.close(); + } catch (Exception x) { + // swallow + } + xmlfilestream = null; + } + super.finalize(); + } + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Added: aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/DataSourceLogTest.java =================================================================== --- aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/DataSourceLogTest.java (rev 0) +++ aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/DataSourceLogTest.java 2010-02-16 19:07:24 UTC (rev 2221) @@ -0,0 +1,90 @@ +/** + * + */ +package org.semanticdesktop.aperture.server; + +import java.io.File; +import java.util.Collection; +import java.util.UUID; +import java.util.logging.Level; +import java.util.logging.LogRecord; + +import junit.framework.TestCase; + +/** + * @author sauermann + * + */ +public class DataSourceLogTest extends TestCase { + + File testfile; + + String datasourceUri = ApertureServer.DATASOURCE_URIPREFIX + + UUID.randomUUID().toString(); + + DataSourceLog log; + + /* (non-Javadoc) + * @see junit.framework.TestCase#setUp() + */ + protected void setUp() throws Exception { + super.setUp(); + testfile = File.createTempFile("ApertureServer.DataSourceLogTest", "log"); + log = new DataSourceLog(datasourceUri, testfile); + } + + + /* (non-Javadoc) + * @see junit.framework.TestCase#tearDown() + */ + protected void tearDown() throws Exception { + debug(); + //testfile.delete(); + super.tearDown(); + } + + /** + * Comment out if needed + */ + private void debug() { + System.out.println("test file is at: "+testfile); + } + + + public void testWrite() throws Exception { + doSomeLogging(); + assertTrue("testfile exists", testfile.exists()); + assertTrue("file is emtpy: size '"+testfile.length()+"'", testfile.length()>0); + } + + public void testReadWrite() throws Exception { + doSomeLogging(); + Collection<LogRecord> read = log.read(-1, Level.ALL); + // we assume 4 message: w, s, l, c + assertEquals(4, read.size()); + + // read 2 + read = log.read(2, Level.ALL); + assertEquals(2, read.size()); + // read 1, it must be the last message above treshold, this is CONFIG + read = log.read(1, Level.ALL); + assertEquals(1, read.size()); + assertEquals(Level.CONFIG, read.iterator().next().getLevel()); + } + + + /*** + * log some messages of different levels + */ + private void doSomeLogging() { + // just write some logging. + log.getLoggerJUL().log(Level.WARNING, "Help!", new Exception("Ha")); + log.getLoggerJUL().log(Level.SEVERE, "severe help!", new Exception("Ha")); + log.getLoggerJUL().log(Level.INFO, "fine help!"); + log.getLoggerJUL().log(Level.CONFIG, "config help!"); + log.getLoggerJUL().log(Level.FINE, "fine"); + log.getLoggerJUL().log(Level.FINER, "finer"); + log.getLoggerJUL().log(Level.FINEST, "finest"); + } + +} Property changes on: aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/DataSourceLogTest.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-02-16 22:26:05
|
Revision: 2222 http://aperture.svn.sourceforge.net/aperture/?rev=2222&view=rev Author: leo_sauermann Date: 2010-02-16 22:25:57 +0000 (Tue, 16 Feb 2010) Log Message: ----------- aperture-webserver: fixed logging of crawling messages, added more reporting, added statistics page, added page listing all crawled resources+dates+bytesize Modified Paths: -------------- aperture-webserver/trunk/.settings/org.eclipse.wst.common.component aperture-webserver/trunk/WebContent/config/index.jsp aperture-webserver/trunk/WebContent/config/sourcedetails.jsp aperture-webserver/trunk/WebContent/config/style.css aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceInstanceDAO.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourceInformation.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourcePool.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/exception/DataSourceNotFoundException.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/thread/CrawlingThread.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java aperture-webserver/trunk/src.test/org/semanticdesktop/aperture/server/DataSourceLogTest.java Added Paths: ----------- aperture-webserver/trunk/WebContent/config/datasourceaccess.jsp aperture-webserver/trunk/WebContent/config/datasourcereport.jsp Removed Paths: ------------- aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp Modified: aperture-webserver/trunk/.settings/org.eclipse.wst.common.component =================================================================== --- aperture-webserver/trunk/.settings/org.eclipse.wst.common.component 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/.settings/org.eclipse.wst.common.component 2010-02-16 22:25:57 UTC (rev 2222) @@ -3,6 +3,7 @@ <wb-module deploy-name="aperture-webserver"> <wb-resource deploy-path="/" source-path="/WebContent"/> <wb-resource deploy-path="/WEB-INF/classes" source-path="/src"/> + <wb-resource deploy-path="/WEB-INF/classes" source-path="/src.test"/> <property name="java-output-path" value="build/classes"/> <property name="context-root" value="aperture-webserver"/> </wb-module> Added: aperture-webserver/trunk/WebContent/config/datasourceaccess.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/datasourceaccess.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/datasourceaccess.jsp 2010-02-16 22:25:57 UTC (rev 2222) @@ -0,0 +1,99 @@ +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> +<%@ taglib prefix="aperture" uri="http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib"%> +<%@page import="java.net.URLDecoder"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> +<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> +<%@page import="org.semanticdesktop.aperture.server.ApertureServer"%> +<%@page import="org.semanticdesktop.aperture.server.CrawlerStateBean"%> +<%@page import="org.semanticdesktop.aperture.server.DataSourceLog"%> +<%@page import="java.util.logging.Level"%> +<%@page import="java.util.Collection"%> +<%@page import="java.util.logging.LogRecord"%> +<%@page import="java.util.Date"%> +<%@page import="java.text.DateFormat"%> +<%@page import="java.text.SimpleDateFormat"%> +<% +/** + * render the dataaccess statistics of a datasource + * @author leo sauermann + */ + +// Authenticate +if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; + +// Get URI of datasource +String uriString = (String)request.getParameter("uri"); +if (uriString == null) { + throw new ServletException("parameter 'uri' identifying datasource missing"); +} +String uriDecoded = URLDecoder.decode(uriString,"UTF-8"); + +ApertureServer server = ApertureServerServlet.getApertureServer(getServletContext()); + +SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd:HH:mm:ss"); + +DataSourceUIBean datasource = server.getDataSourceBean(uriDecoded); +if (datasource != null) + request.setAttribute("datasource", datasource); + +%> + +<%@page import="java.util.Set"%> +<html> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> + <title>Reports for ${datasource.name}</title> + <link title="default" rel="stylesheet" type="text/css" + href="style.css" media="screen" /> +</head> +<body> +<% if (datasource == null) { %> + <center> + <h1>Data source not found</h1> + <h2><a href="index.jsp">Back to the sources list</a></h2> + </center> +<% } else { %> + <h1>All resources of <a href="sourcedetails.jsp?uri=${datasource.dataSourceURI}">'${datasource.name}' data source</a></h1> + <table> + <tr><td>URI</td><td>last access</td><td>size</td></tr> + <% + Set ids = datasource.getAccessDataIDSet(); + for (Object id : ids) { + String idv = id.toString(); + request.setAttribute("accid", idv); + try { + request.setAttribute("accsize", datasource.getAccessDataByteSize(idv)); + } catch (Exception x) { + // swallow + } + try { + long millis = datasource.getAccessDataLastAccess(idv); + if (millis != 0) + { + Date d = new Date(millis); + request.setAttribute("accdate", dateformat.format(d)); + } else + request.setAttribute("accdate", "0"); + } catch (Exception x) { + // swallow + } + + + %> + <tr><td>${accid}</td><td>${accdate}</td><td>${accsize}</td></tr> + <% + } + %> + </table> + </p> + + + <h2><a href="index.jsp">Back to the data sources list</a></h2> +<% datasource.dispose(); + } %> +</body> +</html> Copied: aperture-webserver/trunk/WebContent/config/datasourcereport.jsp (from rev 2221, aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp) =================================================================== --- aperture-webserver/trunk/WebContent/config/datasourcereport.jsp (rev 0) +++ aperture-webserver/trunk/WebContent/config/datasourcereport.jsp 2010-02-16 22:25:57 UTC (rev 2222) @@ -0,0 +1,140 @@ +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> +<%@ taglib prefix="aperture" uri="http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib"%> +<%@page import="java.net.URLDecoder"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> +<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> +<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> +<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> +<%@page import="org.semanticdesktop.aperture.server.ApertureServer"%> +<%@page import="org.semanticdesktop.aperture.server.CrawlerStateBean"%> +<%@page import="org.semanticdesktop.aperture.server.DataSourceLog"%> +<%@page import="java.util.logging.Level"%> +<% +/** + * render the detail statistics of a datasource + * @author leo sauermann + */ + +// Authenticate +if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; + +// Get URI of datasource +String uriString = (String)request.getParameter("uri"); +if (uriString == null) { + throw new ServletException("parameter 'uri' identifying datasource missing"); +} +String uriDecoded = URLDecoder.decode(uriString,"UTF-8"); + +ApertureServer server = ApertureServerServlet.getApertureServer(getServletContext()); + +SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd:HH:mm:ss"); + +DataSourceUIBean datasource = server.getDataSourceBean(uriDecoded); +if (datasource != null) + request.setAttribute("datasource", datasource); +CrawlerStateBean crawlerstate = server.getDetailedCrawlerState(uriDecoded); +if (crawlerstate != null) + request.setAttribute("crawlerstate", crawlerstate); +DataSourceLog dslog = server.getDataSourceLog(uriDecoded); +if (dslog != null) + request.setAttribute("dslog", crawlerstate); + +%> + +<%@page import="java.util.Collection"%> +<%@page import="java.util.logging.LogRecord"%> +<%@page import="java.util.Date"%> +<%@page import="java.text.DateFormat"%> +<%@page import="java.text.SimpleDateFormat"%> +<html> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> + <title>Reports for ${datasource.name}</title> + <link title="default" rel="stylesheet" type="text/css" + href="style.css" media="screen" /> +</head> +<body> +<% if (datasource == null) { %> + <center> + <h1>Data source not found</h1> + <h2><a href="index.jsp">Back to the sources list</a></h2> + </center> +<% } else { %> + <h1>Report of <a href="sourcedetails.jsp?uri=${datasource.dataSourceURI}">'${datasource.name}' data source</a></h1> + <ul> + <li><a href="#lastreport">Last crawl report</a></li> + <li><a href="#statistics">Statistics</a></li> + <li><a href="#logmessages">Log messages</a></li> + </ul> + <%if (datasource.getLastErrorMessage() != null) {%> + <h2>Error message</h2> + <table class="sourcedetails"> + <TR> + <TD class="errormsg"> + <pre>${bean.lastErrorMessage}</pre> + </TD> + </TR> + </table> + <%} %> + + <h2 id="lastreport">Last Crawl report</h2> + <table> + <TR> + <TD class="rowheader">Crawl started:</TD> + <td>${datasource.crawlStartedString}</td> + </TR> + <TR> + <TD class="rowheader">Crawl stopped:</TD> + <td>${datasource.crawlStoppedString}</td> + </TR> + <TR> + <TD class="rowheader">New objects:</TD> + <td>${datasource.newObjects}</td> + </TR> + <TR> + <TD class="rowheader">Changed objects:</TD> + <td>${datasource.modifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Unchanged objects:</TD> + <td>${datasource.unmodifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Removed objects:</TD> + <td>${datasource.deletedObjects}</td> + </TR> + </table> + + <h2 id="statistics">Statistics</h2> + <table> + <tr><td class="rowheader">resources</td><td>${datasource.accessDataSize} resources were crawled</td></tr> + <tr><td> </td><td><a href="datasourceaccess.jsp?uri=${datasource.dataSourceURI}">list all</a> <b>Attention</b>: may be huge</td></tr> + </table> + + + <h2 id="logmessages">Log Messages</h2> + <p> + <table width="100%" border="0"> + <tr><td>Level</td><td>Date</td><td>Message</td></tr> + <% + Collection<LogRecord> logs = dslog.read(100, Level.INFO); + for (LogRecord log : logs) { + request.setAttribute("log", log); + Date logdate = new Date(log.getMillis()); + request.setAttribute("logdate", dateformat.format(logdate)); + %> + <tr><td>${log.level}</td><td>${logdate}</td><td>${log.message}</td></tr> + <% + } + %> + </table> + </p> + + + <h2><a href="index.jsp">Back to the data sources list</a></h2> +<% datasource.dispose(); + } %> +</body> +</html> Property changes on: aperture-webserver/trunk/WebContent/config/datasourcereport.jsp ___________________________________________________________________ Added: svn:mergeinfo + Modified: aperture-webserver/trunk/WebContent/config/index.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/index.jsp 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/WebContent/config/index.jsp 2010-02-16 22:25:57 UTC (rev 2222) @@ -49,12 +49,12 @@ <h2>Currenly configured data sources.</h2> <table> <tr class="tableheader"> - <td>Name:</td> - <td>Type:</td> - <td>Status:</td> - <td>Last crawled:</td> - <td>Next Crawl</td> - <td colspan="3">Actions:</td> + <td>Name</td> + <td>Type</td> + <td>Status</td> + <td>Last crawled</td> + <td>Next crawl</td> + <td colspan="3">Actions</td> </tr> <c:forEach items="${sourcesList}" var="source"> <tr> Modified: aperture-webserver/trunk/WebContent/config/sourcedetails.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-02-16 22:25:57 UTC (rev 2222) @@ -70,7 +70,9 @@ </table> <%} %> - <h2>Crawl report</h2> + <h2>Reports</h2> + <a href="datasourcereport.jsp?uri=${bean.dataSourceURI}"><b>detailed report<b></a> + <h3>Crawl report</h3> <table > <TR> <TD class="rowheader">Crawl started:</TD> Deleted: aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/WebContent/config/sourcestatistics.jsp 2010-02-16 22:25:57 UTC (rev 2222) @@ -1,111 +0,0 @@ -<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> -<%@ taglib prefix="aperture" uri="http://nepomuk.semanticdesktop.org/datawrapper/aperture/taglib"%> -<%@page import="java.net.URLDecoder"%> -<%@page import="org.semanticdesktop.aperture.server.configui.bean.FresnelEditor"%> -<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceDAOFactory"%> -<%@page import="org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean"%> -<%@page import="org.semanticdesktop.aperture.server.configui.dao.DataSourceInstanceDAO"%> -<%@page import="org.semanticdesktop.aperture.servlet.ApertureServerServlet"%> -<%@page import="org.semanticdesktop.aperture.server.ApertureServer"%> -<%@page import="org.semanticdesktop.aperture.server.CrawlerStateBean"%> -<%@page import="org.semanticdesktop.aperture.server.DataSourceLog"%> - -<% -/** - * render the detail statistics of a datasource - * @author leo sauermann - */ - -// Authenticate -if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) - return; - -// Get URI of datasource -String uriString = (String)request.getParameter("uri"); -if (uriString == null) { - throw new ServletException("parameter 'uri' identifying datasource missing"); -} -String uriDecoded = URLDecoder.decode(uriString,"UTF-8"); - -ApertureServer server = ApertureServerServlet.getApertureServer(getServletContext()); -CrawlerStateBean crawlerstate = server.getDetailedCrawlerState(uriDecoded); -if (crawlerstate != null) - request.setAttribute("crawlerstate", crawlerstate); -DataSourceLog dslog = server.getDataSourceLog(uriDecoded); -%> - - -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> - <title>Details for ${bean.name}</title> - <link title="default" rel="stylesheet" type="text/css" - href="style.css" media="screen" /> -</head> -<body> -<% if (bean == null) { %> - <center> - <h1>Data source not found</h1> - <h2><a href="index.jsp">Back to the sources list</a></h2> - </center> -<% } else { %> - <h1>Details of '${bean.name}' data source</h1> - - <h2>Basic data source information</h2> - <%= fresnelEditor.createCommonFormPart() %> - <TR> - <TD class="rowheader">Status:</TD> - <aperture:statuscell state="${bean.crawlingState}"/> - </TR> - </table> - - <h2>Detailed data source configuration</h2> - - <%= fresnelEditor.createSpecificFormPart() %> - - <h2><aperture:crawlbutton bean="${bean}"/></h2> - - <%if (bean.getLastErrorMessage() != null) {%> - <h2>Error message</h2> - <table class="sourcedetails"> - <TR> - <TD class="errormsg"> - <pre>${bean.lastErrorMessage}</pre> - </TD> - </TR> - </table> - <%} %> - - <h2>Crawl report</h2> - <table > - <TR> - <TD class="rowheader">Crawl started:</TD> - <td class="configinput">${bean.crawlStartedString}</td> - </TR> - <TR> - <TD class="rowheader">Crawl stopped:</TD> - <td class="configinput">${bean.crawlStoppedString}</td> - </TR> - <TR> - <TD class="rowheader">New objects:</TD> - <td class="configinput">${bean.newObjects}</td> - </TR> - <TR> - <TD class="rowheader">Changed objects:</TD> - <td class="configinput">${bean.modifiedObjects}</td> - </TR> - <TR> - <TD class="rowheader">Unchanged objects:</TD> - <td class="configinput">${bean.unmodifiedObjects}</td> - </TR> - <TR> - <TD class="rowheader">Removed objects:</TD> - <td class="configinput">${bean.deletedObjects}</td> - </TR> - </table> - - <h2><a href="index.jsp">Back to the data sources list</a></h2> -<% bean.dispose(); - } %> -</body> -</html> Modified: aperture-webserver/trunk/WebContent/config/style.css =================================================================== --- aperture-webserver/trunk/WebContent/config/style.css 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/WebContent/config/style.css 2010-02-16 22:25:57 UTC (rev 2222) @@ -7,20 +7,20 @@ } body { - margin-left: 50px; - margin-right: 50px; - padding: 50px 38px 0 37px; + margin-left: 10px; + margin-right: 10px; + padding: 0px 0px 0px 0px; } h1 { - text-align: center; + text-align: left; font-size: 200%; } h2 { margin: 20px 0 15px 0; padding: 0; - text-align: center; + text-align: left; font-size: 130%; } @@ -37,10 +37,6 @@ margin-right:auto; } -table { - margin-left: auto; - margin-right: auto; -} table.sourcedetails { width: 590px @@ -53,7 +49,6 @@ tr.tableheader { font-weight:bold; - text-align:center; } td.rowheader { @@ -100,17 +95,3 @@ vertical-align: top; } -a:link { - text-decoration: none; - color: #CC0000; -} - -a:visited { - text-decoration: none; - color: #CC6666; -} - -a:hover { - text-decoration: underline; - color: #CC0000; -} \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -32,12 +32,12 @@ * Testing parameters * @param args */ - public static void main(String[] args) { + public static void main(String[] args) throws Exception { DrupalXmlRpcService service = new DrupalXmlRpcService( "localhost", "dac5b06a2e63eed9336ce24f5e56a181", "http://localhost/organikdrupal/?q=services/xmlrpc"); - if(service.connect() == true) + service.connect(); { service.login("root", "root"); try { @@ -134,15 +134,18 @@ public void connect() throws Exception { service = new DrupalXmlRpcService(domain, apikey, serviceUrl); - if(!service.connect()) - throw new Exception("Cannot connect to '"+serviceUrl+"'"); + service.connect(); service.login(username, password); } public void dispose() { if (service != null) { - service.logout(); + try { + service.logout(); + } catch (Exception e) { + // swallow + } service = null; } } @@ -193,8 +196,14 @@ RDFContainer data = object.getMetadata(); DrupalNode node = new DrupalNode(); node.setType(DrupalNode.TYPE_STORY); - node.setTitle(data.getString(NIE.title)); - node.setBody(data.getString(NIE.plainTextContent)); + String title = data.getString(NIE.title); + if (title == null) + title = object.getID().toString(); + node.setTitle(title); + String body = data.getString(NIE.plainTextContent); + if (body == null) + body = " "; + node.setBody(body); service.nodeSave(node); } catch (Exception x) { logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -1,8 +1,5 @@ package org.semanticdesktop.aperture.drupalhandler; - - - import java.security.InvalidKeyException; import java.security.NoSuchAlgorithmException; import java.util.logging.Level; @@ -18,257 +15,242 @@ import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; /** -* see http://drupal.org/node/632844 -* @author Aaron Moline <Aar...@mo...> -*/ + * DrupalXmlRpcService + * + * see http://drupal.org/node/632844, with adaptations by Leo Sauermann + * + * Changelog 16.2.2010 - made exceptions where exceptions are due, replaced bad + * logging with good logging JUL + * + * @author Aaron Moline <Aar...@mo...> + * @author Leo Sauermann <leo...@df...> + */ public class DrupalXmlRpcService { - // <editor-fold desc="Public Properties"> - public String ServiceURL; - public String ServiceDomain; - public String ApiKey; - public String Nonce; - public long TimeStamp; - public String APIHash; - public XmlRpcClient XmlService; - public boolean APIKey_Active; - // </editor-fold> + Logger log = Logger.getLogger(DrupalXmlRpcService.class.getName()); - // <editor-fold desc=" Constructors "> - public DrupalXmlRpcService(String serviceDomain, String apiKey, String serviceURL,boolean api_active) { - this(serviceDomain, apiKey, serviceURL); - this.APIKey_Active = api_active; + public String ServiceURL; + public String ServiceDomain; + public String ApiKey; + public String Nonce; + public long TimeStamp; + public String APIHash; + public XmlRpcClient XmlService; + public boolean APIKey_Active; - } + public DrupalXmlRpcService(String serviceDomain, String apiKey, + String serviceURL, boolean api_active) { + this(serviceDomain, apiKey, serviceURL); + this.APIKey_Active = api_active; - public DrupalXmlRpcService(String serviceDomain, String apiKey, String serviceURL) { - this.ServiceDomain = serviceDomain; - this.ApiKey = apiKey; - this.ServiceURL = serviceURL; - this.APIKey_Active = true; - } - // </editor-fold> + } - // <editor-fold defaultstate="collapsed" desc="Private Properties"> - private String SessionID; - // </editor-fold> + public DrupalXmlRpcService(String serviceDomain, String apiKey, + String serviceURL) { + this.ServiceDomain = serviceDomain; + this.ApiKey = apiKey; + this.ServiceURL = serviceURL; + this.APIKey_Active = true; + } - // <editor-fold defaultstate="collapsed" desc="Private Methods"> - private String GetNonce()/*(int length)*/ { - /* - * //TODO:Get None Generator Working - String allowedCharacters = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; - StringBuilder password = new StringBuilder(); + private String SessionID; - Random rand = new Random(); - for (int i = 0; i < length; i++) - { - password.append() - //password.append(append); - } + private String getNonce()/* (int length) */{ + /* + * //TODO:Get None Generator Working String allowedCharacters = + * "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; + * StringBuilder password = new StringBuilder(); + * + * Random rand = new Random(); for (int i = 0; i < length; i++) { + * password.append() //password.append(append); } + * + * return password.toString(); + */ + return "" + System.currentTimeMillis(); + } - return password.toString(); - * */ - return ""+System.currentTimeMillis(); - } - /* - * - * @drupalServiceCommand - * - */ - private void intializeHash(DrupalServiceCommands drupalServiceCommand) { - this.Nonce = GetNonce(); - this.TimeStamp = System.currentTimeMillis(); - String hashstring = GetHashString(drupalServiceCommand.toString()); - this.APIHash = GetHMAC(hashstring); - } + /* + * + * @drupalServiceCommand + * + */ + private void intializeHash(DrupalServiceCommands drupalServiceCommand) + throws Exception { + this.Nonce = getNonce(); + this.TimeStamp = System.currentTimeMillis(); + String hashstring = getHashString(drupalServiceCommand.toString()); + this.APIHash = getHMAC(hashstring); + } - private void IntializeService() { - try { + private void intializeService() throws Exception { + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + config.setServerURL(new URL(this.ServiceURL)); + XmlService = new XmlRpcClient(); + XmlService.setConfig(config); + } - XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); - config.setServerURL(new URL(this.ServiceURL)); - XmlService = new XmlRpcClient(); - XmlService.setConfig(config); + private String getHashString(String serviccmd) { + StringBuilder sb = new StringBuilder(); + sb.append("" + System.currentTimeMillis()); // Time stamp + sb.append(";"); + sb.append(this.ServiceDomain);// Service Domain + sb.append(";"); + sb.append("" + System.currentTimeMillis()); // Nonce + sb.append(";"); + sb.append(serviccmd); // Service command + log.finest("Created GetHashString: " + sb.toString()); + return sb.toString(); + } - } catch (Exception e) { - e.printStackTrace(); - } - } + private Vector getDefaultParams() { + Vector params = new Vector(); + // Drupal is setup to use Service Keys, then add the following. + if (this.APIKey_Active) { + params.add(this.APIHash); + params.add(this.ServiceDomain); + params.add("" + this.TimeStamp); + params.add(this.Nonce); + params.add(this.SessionID); + } + return params; + } - private String GetHashString(String serviccmd) { - StringBuilder sb = new StringBuilder(); - sb.append(""+System.currentTimeMillis()); // Time stamp - sb.append(";"); - sb.append(this.ServiceDomain);//Service Domain - sb.append(";"); - sb.append(""+System.currentTimeMillis()); //Nonce - sb.append(";"); - sb.append(serviccmd); //Service command - Debug.println("Created GetHashString: ",sb.toString()); - return sb.toString(); - } + // </editor-fold> - private Vector GetDefaultParams() { - Vector params = new Vector(); - //Drupal is setup to use Service Keys, then add the following. - if(this.APIKey_Active) - { - params.add(this.APIHash); - params.add(this.ServiceDomain); - params.add(""+this.TimeStamp); - params.add(this.Nonce); - params.add(this.SessionID); - } - return params; - } - // </editor-fold> + // <editor-fold defaultstate="collapsed" desc="Public Methods "> + public String getHMAC(String message) throws Exception { - // <editor-fold defaultstate="collapsed" desc="Public Methods "> - public String GetHMAC(String message) { + Mac mac; + try { + Charset csets = Charset.forName("US-ASCII"); + SecretKeySpec keySpec = new javax.crypto.spec.SecretKeySpec(csets + .encode(this.ApiKey).array(), "HmacSHA256"); + mac = javax.crypto.Mac.getInstance("HmacsSHA256"); + mac.init(keySpec); + byte[] hash = mac.doFinal(csets.encode(message).array()); - Mac mac; - try { - Charset csets = Charset.forName("US-ASCII"); - SecretKeySpec keySpec = new javax.crypto.spec.SecretKeySpec(csets.encode(this.ApiKey).array(), "HmacSHA256"); - mac = javax.crypto.Mac.getInstance("HmacSHA256"); - mac.init(keySpec); - byte[] hash = mac.doFinal(csets.encode(message).array()); + String result = ""; + for (int i = 0; i < hash.length; i++) { + result += Integer.toString((hash[i] & 0xff) + 0x100, 16) + .substring(1); + } - String result = ""; - for (int i = 0; i < hash.length; i++) { - result += Integer.toString((hash[i] & 0xff) + 0x100, 16).substring(1); - } + log.finest("Created HMAC: " + result); + return result; - Debug.println("Created HMAC: ", result); - return result; + } catch (Exception ex) { + log.log(Level.SEVERE, ex.getLocalizedMessage(), ex); + throw ex; + } - } catch (InvalidKeyException ex) { - Logger.getLogger(DrupalXmlRpcService.class.getName()).log(Level.SEVERE, null, ex); - throw new RuntimeException(ex); - } catch (NoSuchAlgorithmException ex) { - Logger.getLogger(DrupalXmlRpcService.class.getName()).log(Level.SEVERE, null, ex); - throw new RuntimeException(ex); - } + } + /** + * Connect to the remote service + * + * @return + * @throws Exception + */ + public void connect() throws Exception { + // Intialize Hash + intializeHash(DrupalServiceCommands.SystemConnect); - } + // Intialize Service + intializeService(); - public boolean connect() { - try { + Debug.println("XmlService Service Intialized", " "); - //Intialize Hash - intializeHash(DrupalServiceCommands.SystemConnect); + try { + HashMap map = (HashMap) XmlService + .execute(DrupalServiceCommands.SystemConnect.toString(), + new Object[] {}); + + this.SessionID = (String) map.get("sessid"); - //Intialize Service - IntializeService(); + log.finest("Conn SessionID: " + this.SessionID); + } catch (Exception x) { + throw new Exception("cannot connect to "+ServiceURL+": "+x.getMessage(),x); + } - Debug.println("XmlService Service Intialized"," "); - HashMap map = (HashMap)XmlService.execute(DrupalServiceCommands.SystemConnect.toString(), new Object[]{}); + } - this.SessionID = (String)map.get("sessid"); + public boolean login(String username, String password) { + try { + intializeHash(DrupalServiceCommands.UserLogin); - Debug.println("Conn SessionID: ", this.SessionID); + Vector params = getDefaultParams(); + // Add Login Paramaters + params.add(username); + params.add(password); - return true; + HashMap o = (HashMap) XmlService.execute( + DrupalServiceCommands.UserLogin.toString(), params); + if (!o.isEmpty()) { + if (o.containsKey(username) && o.containsKey(password)) { + // confirmLogin(HashMap loginValue, String username, String + // password); + } + } + this.SessionID = (String) o.get("sessid"); - } catch (Exception e) { - e.printStackTrace(); - return false; - } - } + Debug.println("Successfull Login:", o.toString()); + return true; + } catch (Exception e) { + log.log(Level.WARNING, e.getMessage(), e); + } + return false; + } - public boolean login(String username, String password) { - try { - intializeHash(DrupalServiceCommands.UserLogin); + /* + * public void Login(String username, String password) { try { + * IntializeHash(DrupalServiceCommands.UserLogin); + * + * + * Vector params = GetDefaultParams(); //Add Login Paramaters + * params.add(username); params.add(password); + * + * HashMap o = + * (HashMap)XmlService.execute(DrupalServiceCommands.UserLogin.toString(), + * params); + * + * this.SessionID = (String)o.get("sessid"); + * + * Debug.println("Successfull Login:", o.toString()); } catch (Exception e) { + * System.out.println(e.toString()); } } + */ - Vector params = GetDefaultParams(); - //Add Login Paramaters - params.add(username); - params.add(password); + public boolean logout() throws Exception { + intializeHash(DrupalServiceCommands.UserLogout); + Vector params = getDefaultParams(); + params.add(this.SessionID); + Object o = XmlService.execute(DrupalServiceCommands.UserLogout + .toString(), params); - HashMap o = (HashMap)XmlService.execute(DrupalServiceCommands.UserLogin.toString(), params); - if(!o.isEmpty()) { - if(o.containsKey(username) && o.containsKey(password)) { - //confirmLogin(HashMap loginValue, String username, String password); - } - } - this.SessionID = (String)o.get("sessid"); + Debug.println("Logout Sucessfull:", o.toString()); + return true; + } - Debug.println("Successfull Login:", o.toString()); - return true; - } catch (Exception e) { - e.printStackTrace(); - } - return false; - } - /* - public void Login(String username, String password) { - try { - IntializeHash(DrupalServiceCommands.UserLogin); + // Testing New Things Not valid + public void testFileSave() throws Exception { + intializeHash(DrupalServiceCommands.FileSave); + byte[] Filebyte = new byte[10]; + Vector params = getDefaultParams(); + params.add(Filebyte); + Object o = XmlService.execute(DrupalServiceCommands.FileSave + .toString(), params); + Debug.println("Test Sucessfull:", o.toString()); + } - Vector params = GetDefaultParams(); - //Add Login Paramaters - params.add(username); - params.add(password); - - HashMap o = (HashMap)XmlService.execute(DrupalServiceCommands.UserLogin.toString(), params); - - this.SessionID = (String)o.get("sessid"); - - Debug.println("Successfull Login:", o.toString()); - - } catch (Exception e) { - System.out.println(e.toString()); - } - } -*/ - - public boolean logout() { - - try { - intializeHash(DrupalServiceCommands.UserLogout); - Vector params = GetDefaultParams(); - params.add(this.SessionID); - Object o = XmlService.execute(DrupalServiceCommands.UserLogout.toString(), params); - - Debug.println("Logout Sucessfull:",o.toString()); - return true; - } catch (Exception e) { - System.out.println(e.toString()); - return false; - } - } - - //Testing New Things Not valid - public void testFileSave() { - try { - - intializeHash(DrupalServiceCommands.FileSave); - byte[] Filebyte = new byte[10]; - Vector params = GetDefaultParams(); - params.add(Filebyte); - Object o = XmlService.execute(DrupalServiceCommands.FileSave.toString(), params); - Debug.println("Test Sucessfull:", o.toString()); - } catch (Exception e) { - System.out.println(e.toString()); - } - } - public Object nodeSave(DrupalNode node) throws Exception { - try { - intializeHash(DrupalServiceCommands.NodeSave); - Vector params = GetDefaultParams(); - params.add(node); - Object o = XmlService.execute(DrupalServiceCommands.NodeSave.toString(), params); - Debug.println("Test Sucessfull:", o.toString()); - return o; - } catch (Exception e) { - e.printStackTrace(); - throw e; - } - + intializeHash(DrupalServiceCommands.NodeSave); + Vector params = getDefaultParams(); + params.add(node); + Object o = XmlService.execute( + DrupalServiceCommands.NodeSave.toString(), params); + Debug.println("Test Sucessfull:", o.toString()); + return o; + } } \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -38,6 +38,7 @@ import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.datasource.DataSourceFactory; import org.semanticdesktop.aperture.detector.DataSourceDescription; +import org.semanticdesktop.aperture.server.configui.bean.DataSourceUIBean; import org.semanticdesktop.aperture.vocabulary.NIE; /** @@ -526,6 +527,15 @@ throws ApertureServerException; /** + * Get the log messages of this datasource + * @return previous log messages of a datasource, as recorded by the CrawlerHandler. + * @throws ApertureServerException if the source under the given URI + * has not been found or something else goes wrong + */ + public DataSourceLog getDataSourceLog(String dataSourceUri) + throws ApertureServerException; + + /** * Returns the URI that was last crawled by this aperture server. * @return The URI of a data item that was last crawled by this aperture * server. @@ -604,4 +614,10 @@ * @return true, if the user is an authenticated administrator */ public boolean authenticateAdministrator(String username, String password); + + /** + * @param dataSourceUri the uri of the data source to be retrieved. + * @return A single data source bean. + */ + public DataSourceUIBean getDataSourceBean(String dataSourceUri); } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/DataSourceLog.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -3,14 +3,19 @@ */ package org.semanticdesktop.aperture.server; +import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; import java.io.PrintStream; import java.io.StringReader; import java.util.Collection; import java.util.LinkedList; +import java.util.logging.Formatter; +import java.util.logging.Handler; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Logger; @@ -46,8 +51,21 @@ * * So the solution for this problem, using off-the-shelf software, is to use the JUL logger * and the XMLFormatter, as this log format is at least documented and parsable. - * I have to write the parser myself, though. + * BUT: Oh pots of gold, there is no parser for the "standard" xml format created by JUL. + * How standard is that? + * I have to write the parser myself. * + * BUT the XMLFormatter is from hell and parsing XML files the same. the output format throws an error: + * "xml document structures must start and end with the same entity", + * probably because the XMLFormatter didn't close() yet. + * So XML is not the right format, its not standard and I can't parse it. + * Hence see with what I had to come up with. + * <code> + * LEVEL;MILLIS;"MESSAGE" + * </code> + * + * replaces: \n with \\n, " with ', \r with nothing. + * * Note: I tried to expose only a slf4j logger, and use the JUL Logger only internally, * but the bastards from SLF4J made the constructor new JDK14LoggerAdapter(loggerJUL) private, * so this "oh so standardized and properly programmed" library cannot be used to wrap a JUL logger. @@ -62,7 +80,7 @@ */ static final Level TRESHOLD = Level.CONFIG; - Logger loggerJUL; + final Logger loggerJUL; /** * String identifying the datasource to which this log belongs @@ -77,51 +95,78 @@ /** * Output log formatter */ - XMLFormatter xmlformatter; + Formatter formatter = new ApertureDSLogFormatter(); + /** * output log file handler */ - StreamHandler xmlfilehandler; + StreamHandler filestreamhandler; /** - * A printstream to autoflush, because the xmlfilestream does not have this + * A printstream to autoflush, because the filestreamhandler does not have this */ - PrintStream xmlfilestreamflusher; + PrintStream filestreamflusher; /** * the file to write to */ - FileOutputStream xmlfilestream; + FileOutputStream filestream; - /** - * A dummy entity resolver to get around the problem that every xml parsing - * involves dereferencing DTDs - */ - final static EntityResolver dummyEntityResolver = new EntityResolver() { - public InputSource resolveEntity(String publicID, String systemID) - throws SAXException { - return new InputSource(new StringReader("")); - } - }; + + private static class ApertureDSLogFormatter extends Formatter { + + @Override + public String format(LogRecord record) { + String msg = record.getMessage(); + if (msg==null) + msg = ""; + else + { + msg.replace("\n", "\\n"); + msg.replace("\r", ""); // windows CR+LF + msg.replace("\"", "'"); + } + String result = + record.getLevel().getName() + ";" + + record.getMillis()+";\""+msg+"\"" + + "\n"; + ; + + return result; + } + + } + + static class FlushingFileStreamHandler extends StreamHandler { + + public FlushingFileStreamHandler(OutputStream out, Formatter formatter) { + super(out, formatter); + } + + /** + * yep, its ugly, but flushing from the outside of this would be even worse. + */ + @Override + public synchronized void publish(LogRecord record) { + super.publish(record); + flush(); + } + + } + + + + /** - * Oh pots of gold, there is no parser for the "standard" xml format created by JUL. - * How standard is that? - * * This class has a serious flaw: when the log file gets BIG, it has to load the whole log * file into memory, which is a bit stupid. I am not going to do that better, if you want, * go ahead. * - * According to - * <a href="http://java.sun.com/j2se/1.4.2/docs/guide/util/logging/overview.html#3.0">DTD</a>, - * this is to do: - * date, millis, sequence, logger?, level, - * class?, method?, thread?, message, key?, catalog?, param*, exception? * * @author sauermann - * */ - private class LogParser extends DefaultHandler { + private class LogParser { @@ -130,12 +175,6 @@ */ @SuppressWarnings("unchecked") final Collection buffer; - - LogRecord cur = null; - - StringBuffer curValue = null; - String curException = null; - final Level minLevel; public LogParser(int length, Level minLevel) { @@ -149,118 +188,47 @@ @SuppressWarnings("unchecked") public Collection<LogRecord> parse(File file) throws Exception { - /* - SAXParserFactory spf = SAXParserFactory.newInstance(); - spf.setValidating(false); - spf.setXIncludeAware(false); - SAXParser parser = spf.newSAXParser(); - parser.getXMLReader().setEntityResolver(dummyEntityResolver); - parser.getXMLReader().setContentHandler(this); - //reader.setErrorHandler(new MyErrorHandler()); - parser.parse(file, this); - */ - XMLReader reader = XMLReaderFactory.createXMLReader(); - reader.setErrorHandler(new ErrorHandler() { - - public void error(SAXParseException exception) - throws SAXException { - // TODO Auto-generated method stub + + FileInputStream fin = new FileInputStream(file); + try { + BufferedReader reader = new BufferedReader(new InputStreamReader(fin)); + String in; + do { + in = reader.readLine(); + if (in==null) + continue; - } - - public void fatalError(SAXParseException exception) - throws SAXException { - // TODO Auto-generated method stub + try { + LogRecord cur = new LogRecord(Level.ALL, ""); + + // parse line, yep, this is by hand + int i = in.indexOf(";"); + Level lev = Level.parse(in.substring(0,i)); + cur.setLevel(lev); + int j = in.indexOf(";", i+1); + long millis = Long.parseLong(in.substring(i+1, j)); + cur.setMillis(millis); + if (in.charAt(j+1)!='\"') + throw new Exception("message must start with \""); + String msg = in.substring(j+2, in.length()-1); + cur.setMessage(msg); + buffer.add(cur); + + } catch (Exception x) { + // swallow. it is important that something shows. + System.err.println("Error parsing line "+in); + x.printStackTrace(); + } - } - - public void warning(SAXParseException exception) - throws SAXException { - // TODO Auto-generated method stub - } - - }); - reader.setEntityResolver(dummyEntityResolver); - reader.setContentHandler(this); - FileInputStream fin = new FileInputStream(file); - try { - reader.parse(new InputSource(fin)); + + } while (in != null); } finally { fin.close(); } return (Collection<LogRecord>)buffer; } - - @Override - public void endElement(String uri, String localName, String name) - throws SAXException { - if ("record".equals(localName)) { - // add the current element to the buffer - if (cur == null) - throw new SAXException("closing 'record' was missing an opening 'record'"); - // only return those above the required log level - if (cur.getLevel().intValue() >= minLevel.intValue()) - buffer.add(cur); - cur = null; - curValue = null; - curException = null; - } else { - if (curValue == null) // empty - return; - if (cur == null) - throw new SAXException("handling close of '"+localName+"': missing open element 'record'"); - - // trim curValue, this is always a problem with newlines - String val = curValue.toString().trim(); - - // "date" - not parse it, its not in the logrecord - // "sequence" - not - // class?, method?, thread? - not - // key?, catalog?, param* - not - if ("millis".equals(localName)) { - long millis = Long.parseLong(val); - cur.setMillis(millis); - } else if ("logger".equals(localName)) { - cur.setLoggerName(val); - } else if ("level".equals(localName)) { - cur.setLevel(Level.parse(val)); - } else if ("message".equals(localName)) { - // are we inside an exception? - if (curException != null) - curException = val; - else - cur.setMessage(val); - } else if ("exception".equals(localName)) - curException = null; - - // the curValue was read, free it - curValue = null; - } - - } - - @Override - public void characters(char[] ch, int start, int length) - throws SAXException { - if (curValue == null) - curValue = new StringBuffer(new String(ch, start, length)); - else - curValue.append(ch, start, length); - } - - /** - * Parse according to - */ - @Override - public void startElement(String uri, String localName, String name, - Attributes attributes) throws SAXException { - if ("record".equals(localName)) - cur = new LogRecord(Level.ALL, ""); - else if ("exception".equals(localName)) - curException = ""; - } } /** @@ -275,25 +243,25 @@ this.logfile = logfile; // create the JUL Logger loggerJUL = Logger.getLogger(getClass().getName()+"."+dataSourceUri); - // add handler - xmlformatter = new XMLFormatter(); - xmlfilestream = new FileOutputStream(logfile, true); - xmlfilestreamflusher = new PrintStream(xmlfilestream, true); - xmlfilehandler = new StreamHandler(xmlfilestreamflusher, xmlformatter) { - - /** - * yep, its ugly, but flushing from the outside of this would be even worse. - */ - @Override - public synchronized void publish(LogRecord record) { - super.publish(record); - flush(); - } - - }; - loggerJUL.addHandler(xmlfilehandler); + // NOTE: in rare occasions, this logger can be attached multiple times. + // For example, this happens when Tomcat restarts during debugging. + // Then its good to remove the old handler by other instances of this class, + // otherwise the messages are written twice. + // It should be taken care of by the finalizer, but who trusts that. + // Don't remove this - look up - the logger has its own name, containing the datasourceuri + // - so this is really only on weird occasions. + // + for (Handler handler: loggerJUL.getHandlers()) { + if (handler instanceof FlushingFileStreamHandler) + loggerJUL.removeHandler(handler); + } + // add our handler + filestream = new FileOutputStream(logfile, true); + filestreamflusher = new PrintStream(filestream, true); + filestreamhandler = new FlushingFileStreamHandler(filestreamflusher, formatter); + loggerJUL.addHandler(filestreamhandler); loggerJUL.setLevel(TRESHOLD); // to be safe, set on both - xmlfilehandler.setLevel(TRESHOLD); // to be safe, set on both + filestreamhandler.setLevel(TRESHOLD); // to be safe, set on both } @@ -322,13 +290,21 @@ @Override protected void finalize() throws Throwable { - if (xmlfilestream != null) { + if (filestreamhandler != null) { + loggerJUL.removeHandler(filestreamhandler); + filestreamhandler = null; + } + if (filestreamflusher != null) { + filestreamflusher.close(); + filestreamflusher = null; + } + if (filestream != null) { try { - xmlfilestream.close(); + filestream.close(); } catch (Exception x) { // swallow } - xmlfilestream = null; + filestream = null; } super.finalize(); } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/bean/DataSourceUIBean.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -50,6 +50,7 @@ import org.ontoware.rdf2go.model.Syntax; import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.vocabulary.RDF; +import org.semanticdesktop.aperture.accessor.AccessData; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.server.CrawlerStateBean; @@ -91,6 +92,11 @@ /** Last error message (or null if no error detected) */ private String lastErrorMessage; + + /** + * only for reading the size. + */ + private AccessData accessData; //////////////////////////////////////////////////////////////////////// ///////////////////////////// CONSTRUCTOR ////////////////////////////// @@ -104,6 +110,7 @@ * @param crawlerStateBean The bean containing the current state of this * data source. * @param syntax The serialization syntax used. + * @param accessData only for reading the size. * @param ontologyDAO The ontologyDAO used to interact with the data source * ontology. * @throws DataWrapperUIException if the initialization fails @@ -113,6 +120,7 @@ String configuration, CrawlerStateBean crawlerStateBean, Syntax syntax, + AccessData accessData, DataSourceOntologyDAO ontologyDAO) throws DataWrapperUIException { Model model = null; @@ -139,6 +147,7 @@ "Couldn't initialize the data source bean", ioe); } + this.accessData = accessData; } //////////////////////////////////////////////////////////////////////// @@ -373,6 +382,59 @@ } /** + * Statistics: get the size of the accessdata + * @return the size of the access data. + */ + public int getAccessDataSize() { + return accessData.getSize(); + } + + /** + * Statistics: get the ids of the access datas + * @return the set of ids (Strings) from accessdata. Note that this can get massively large + */ + public Set getAccessDataIDSet() { + return accessData.getStoredIDs(); + } + + /** + * Return the last access date for this ID + * @param id the ID to check + * @return the last access value. if the value is unparseable, 0 + * @throws IndexOutOfBoundsException when the ID has no last access value + */ + public long getAccessDataLastAccess(String id) + throws IndexOutOfBoundsException { + String val = accessData.get(id, AccessData.DATE_KEY); + if (val == null) + throw new IndexOutOfBoundsException("no last access found for "+id); + try { + return Long.parseLong(val); + } catch (Exception x) { + log.fine("cannot read value '"+val+"': "+x); + return 0; + } + } + + /** + * Return the byte size for this ID + * @param id the ID to check + * @return the byte size. if the value is unparseable, 0 + * @throws IndexOutOfBoundsException when the ID has no last access value + */ + public long getAccessDataByteSize(String id) + throws IndexOutOfBoundsException { + String val = accessData.get(id, AccessData.BYTE_SIZE_KEY); + if (val == null) + throw new IndexOutOfBoundsException("no byte size found for "+id); + try { + return Long.parseLong(val); + } catch (Exception x) { + log.fine("cannot read value '"+val+"': "+x); + return 0; + } + } + /** * Releases all system resources owned by this data source ui bean. */ public void dispose() { Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceInstanceDAO.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceInstanceDAO.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/configui/dao/DataSourceInstanceDAO.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -96,18 +96,7 @@ for (String uri : uriList) { try { - String configuration = wrapper.getDataSourceConfig( - uri, - Syntax.Ntriples.getMimeType()); - CrawlerStateBean crawlerStateBean = wrapper - .getDetailedCrawlerState(uri); - DataSourceUIBean bean = new DataSourceUIBean( - uri, - configuration, - crawlerStateBean, - Syntax.Ntriples, - ontologyDAO); - result.add(bean); + result.add(wrapper.getDataSourceBean(uri)); } catch (Exception e) { log.log(Level.SEVERE,"Couldn't create a data source bean",e); } @@ -120,23 +109,7 @@ * @return A single data source bean. */ public DataSourceUIBean getDataSourceBean(String dataSourceUri) { - DataSourceUIBean bean = null; - try { - String configuration = wrapper.getDataSourceConfig( - dataSourceUri, - Syntax.Ntriples.getMimeType()); - CrawlerStateBean crawlerStateBean = wrapper - .getDetailedCrawlerState(dataSourceUri); - bean = new DataSourceUIBean( - dataSourceUri, - configuration, - crawlerStateBean, - Syntax.Ntriples, - ontologyDAO); - } catch (Exception e) { - log.log(Level.SEVERE,"Couldn't create a data source bean",e); - } - return bean; + return wrapper.getDataSourceBean(dataSourceUri); } /** Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourceInformation.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourceInformation.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourceInformation.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -41,6 +41,7 @@ import org.semanticdesktop.aperture.crawler.Crawler; import org.semanticdesktop.aperture.crawler.ExitCode; import org.semanticdesktop.aperture.server.CrawlerStateBean; +import org.semanticdesktop.aperture.server.DataSourceLog; import org.semanticdesktop.aperture.server.STATE; import org.semanticdesktop.aperture.server.impl.ServerImpl; @@ -71,6 +72,16 @@ /** The path to a AccessData file */ private File accessDataFile; + /** + * The path to the logfile + */ + private File logfile; + + /** + * The data source logger + */ + private DataSourceLog datasourcelog; + /** Timeout between two consecutive crawls */ private long timeout; @@ -208,6 +219,10 @@ */ public void setAccessDataFile(File accessDataFile) { this.accessDataFile = accessDataFile; + + // TODO: replace the whole configuration way. put all stuff of a datasource into one folder, + // including the config. this will make things a lot easier. + this.logfile = new File(accessDataFile.toString()+".logfile"); } /** @return the interval between crawls */ @@ -263,4 +278,15 @@ throw new IOException("Wrong state of object after deserialisation"); } } + + /** + * Get the DataSourceLog. This can go wrong if the log file can't be created. + * @return the data source log file. + * @throws Exception if the file cannot be opened + */ + public DataSourceLog getDataSourceLog() throws Exception { + if (datasourcelog == null) + datasourcelog = new DataSourceLog(uriString, logfile); + return datasourcelog; + } } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourcePool.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourcePool.java 2010-02-16 19:07:24 UTC (rev 2221) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/datasource/DataSourcePool.java 2010-02-16 22:25:57 UTC (rev 2222) @@ -74,11 +74,11 @@ import org.semanticdesktop.aperture.server.ApertureServerException; import org.semanticdesktop.aperture.server.CrawlerStateBean; import org.semanticdesktop.aperture.server.DataSourceChangedEvent; +import org.semanticdesktop.aperture.server.DataSourceLog; import org.semanticdesktop.aperture.server.STATE; import org.semanticdesktop.aperture.server.exception.DataSourceAlreadyPresentException; import org.semanticdesktop.aperture.server.exception.DataSourceNotFoundException; import org.semanticdesktop.aperture.server.exception.DataSourceNotLockedException; -import org.semanticdesktop.aperture.server.impl.ServerCrawlerHandler; import org.semanticdesktop.aperture.server.impl.ServerImpl; import org.semanticdesktop.aperture.server.thread.CrawlingThreadPoo... [truncated message content] |
From: <leo...@us...> - 2010-03-23 19:15:06
|
Revision: 2298 http://aperture.svn.sourceforge.net/aperture/?rev=2298&view=rev Author: leo_sauermann Date: 2010-03-23 19:14:24 +0000 (Tue, 23 Mar 2010) Log Message: ----------- aperture-webserver: added a REST servlet to upload configuration values. started with the crawlerhandler config. kind of works. Modified Paths: -------------- aperture-webserver/trunk/WebContent/WEB-INF/web.xml aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/STATE.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java Added Paths: ----------- aperture-webserver/trunk/WebContent/WEB-INF/config/ aperture-webserver/trunk/WebContent/WEB-INF/config/serverconfig.trig aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ConfigRESTServlet.java Added: aperture-webserver/trunk/WebContent/WEB-INF/config/serverconfig.trig =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/config/serverconfig.trig (rev 0) +++ aperture-webserver/trunk/WebContent/WEB-INF/config/serverconfig.trig 2010-03-23 19:14:24 UTC (rev 2298) @@ -0,0 +1,21 @@ +# NOTE: this is an initial file. It can be deleted and it will be overwritten by the web server +# + +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +@prefix sourceformat: <http://aperture.semanticdesktop.org/ontology/sourceformat#> . +@prefix source: <http://aperture.semanticdesktop.org/ontology/2007/08/12/source#> . +@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix drupalcw: <http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#> . + +<urn:aperture:server:crawlerhandlercontext> { + <urn:aperture:server:crawlerhandler> a drupalcw:DrupalCrawlerHandler; + drupalcw:configDomain "localhost"; + drupalcw:configApikey "dac5b06a2e63eed9336ce24f5e56a181"; + drupalcw:configXMLRPCServiceUrl "http://localhost/organikdrupal/?q=services/xmlrpc"; + drupalcw:configUsername "root"; + drupalcw:configPassword "root". +} + + Modified: aperture-webserver/trunk/WebContent/WEB-INF/web.xml =================================================================== --- aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-03-16 14:05:36 UTC (rev 2297) +++ aperture-webserver/trunk/WebContent/WEB-INF/web.xml 2010-03-23 19:14:24 UTC (rev 2298) @@ -83,6 +83,15 @@ <description>Sets, whether the servlet supports vendor extensions for XML-RPC.</description> </init-param> </servlet> + <servlet> + <description> + Allows remote clients to configure this aperture-webserver via a simple HTTP protocol</description> + <display-name> + ConfigRESTServlet</display-name> + <servlet-name>ConfigRESTServlet</servlet-name> + <servlet-class> + org.semanticdesktop.aperture.servlet.ConfigRESTServlet</servlet-class> + </servlet> <servlet-mapping> <servlet-name>XmlRpcServlet</servlet-name> <url-pattern>/xmlrpc</url-pattern> @@ -115,6 +124,10 @@ <servlet-name>RefreshResourceServlet</servlet-name> <url-pattern>/config/refreshResource</url-pattern> </servlet-mapping> + <servlet-mapping> + <servlet-name>ConfigRESTServlet</servlet-name> + <url-pattern>/config/api/*</url-pattern> + </servlet-mapping> <welcome-file-list> <welcome-file>index.jsp</welcome-file> </welcome-file-list> Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -0,0 +1,24 @@ +/** + * + */ +package org.semanticdesktop.aperture.drupalhandler; + +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; + +/** + * Configuration constants from RDF. + * This should be auto-generated, dunno how and too lazy to do it right now. + * @author sauermann + * + */ +public class DRUPALCRAWLERHANDLERCONFIG { + + public static final URI DrupalCrawlerHandler = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#DrupalCrawlerHandler"); + public static final URI configDomain = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configDomain"); + public static final URI configApikey = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configApikey"); + public static final URI configUsername = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configUsername"); + public static final URI configPassword = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configPassword"); + public static final URI configXMLRPCServiceUrl = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configXMLRPCServiceUrl"); + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-03-16 14:05:36 UTC (rev 2297) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -6,6 +6,13 @@ import java.util.Date; import java.util.Set; +import org.ontoware.aifbcommons.collection.ClosableIterator; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Statement; +import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.Variable; +import org.ontoware.rdf2go.util.RDFTool; +import org.ontoware.rdf2go.vocabulary.RDF; import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.crawler.Crawler; import org.semanticdesktop.aperture.crawler.base.CrawlerHandlerBase; @@ -89,20 +96,49 @@ DrupalXmlRpcService service; - public DrupalCrawlerHandler(ApertureRegistriesAggregate registries) { + /** + * Create the crawlerhandler and read the configuration. + * Do not access the configmodel after the constructor is finished, rather copy everything you need from the model. + * @param registries the registries to use + * @param configModel the configmodel with settings. Do not access it after the constructor is finished + */ + public DrupalCrawlerHandler(ApertureRegistriesAggregate registries, Model configModel) throws Exception { super( // TODO - use something cleverer here new MagicMimeTypeIdentifier(), registries.getExtractorRegistry(), null); - this.domain = "localhost"; - this.apikey = "dac5b06a2e63eed9336ce24f5e56a181"; - this.serviceUrl = "http://localhost/organikdrupal/?q=services/xmlrpc"; - this.username = "root"; - this.password = "root"; - setExtractingContents(true); + readConfiguration(configModel); } + /** + * during construction: read the config + * @param configModel + */ + private void readConfiguration(Model configModel) throws Exception { + // read values from model + Resource configR; + ClosableIterator<Statement> it = configModel.findStatements(Variable.ANY, RDF.type, DRUPALCRAWLERHANDLERCONFIG.DrupalCrawlerHandler); + try { + if (!it.hasNext()) + throw new Exception("cannot find instance of RDF:type "+DRUPALCRAWLERHANDLERCONFIG.DrupalCrawlerHandler+" in configmodel"); + configR = it.next().getSubject(); + } finally { + it.close(); + } + + this.domain = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configDomain); + this.apikey = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configApikey); + this.serviceUrl = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configXMLRPCServiceUrl); + this.username = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configUsername); + this.password = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configPassword); + + // TODO: check config? + + setExtractingContents(true); + } + + private static MimeTypeIdentifier getMimeTypeIdentifier( ApertureRegistriesAggregate apertureRegistries) { MimeTypeIdentifierRegistry registry Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-03-16 14:05:36 UTC (rev 2297) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -115,9 +115,6 @@ return params; } - // </editor-fold> - - // <editor-fold defaultstate="collapsed" desc="Public Methods "> public String getHMAC(String message) throws Exception { Mac mac; Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl 2010-03-23 19:14:24 UTC (rev 2298) @@ -0,0 +1,45 @@ +@prefix fresnel: <http://www.w3.org/2004/09/fresnel#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +@prefix sourceformat: <http://aperture.semanticdesktop.org/ontology/sourceformat#> . +@prefix source: <http://aperture.semanticdesktop.org/ontology/2007/08/12/source#> . +@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix : <http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#> . + +:DrupalCrawlerHandler a rdfs:Class ; + rdfs:label "Drupal Crawler Handler" ; + rdfs:comment "Configuration Format for the Drupal Crawler Handler class of the aperture-webserver" . + + +:configDomain a rdf:Property ; + rdfs:label "Domain" ; + rdfs:comment "Domain name of the Drupal Server." ; + rdfs:domain :DrupalCrawlerHandler ; + rdfs:range xsd:string . + +:configApikey a rdf:Property ; + rdfs:label "API key" ; + rdfs:comment "XML-RPC API key of the Drupal Server." ; + rdfs:domain :DrupalCrawlerHandler ; + rdfs:range xsd:string . + +:configXMLRPCServiceUrl a rdf:Property ; + rdfs:label "XML RPC Service URL" ; + rdfs:comment "Address of the XML RPC endpoint of the Drupal Server. Example: http://www.example.com/drupal/?q=services/xmlrpc" ; + rdfs:domain :DrupalCrawlerHandler ; + rdfs:range xsd:string . + +:configUsername a rdf:Property ; + rdfs:label "username" ; + rdfs:comment "Username used to log into the Drupal Server and add content." ; + rdfs:domain :DrupalCrawlerHandler ; + rdfs:range xsd:string . + +:configPassword a rdf:Property ; + rdfs:label "password" ; + rdfs:comment "Password used to log into the Drupal Server and add content. Stored as plaintext." ; + rdfs:domain :DrupalCrawlerHandler ; + rdfs:range xsd:string . + + \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-03-16 14:05:36 UTC (rev 2297) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/ApertureServer.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -122,6 +122,11 @@ * The URI identifying the named graph context in the modelset which is used to store the main server */ public static final URI APERTURESERVER_CONFIGCONTEXTURI = new URIImpl(APERTURESERVER_URIPREFIX+"configurationcontext"); + + /** + * The URI identifying the named graph context in the modelset which is used to store the crawler handler config + */ + public static final URI APERTURESERVER_CRAWLERHANDLERCONTEXTURI = new URIImpl(APERTURESERVER_URIPREFIX+"crawlerhandlercontext"); /** * Configuration URI for the crawler handler in the configuration RDF model. @@ -589,7 +594,8 @@ /** * Read the current configuration of the crawler handler. * The result is returned as RDF, in the serialization format chosen in the mimetype parameter. - * The configuration is stored in a separate named graph, identified as {@link #CRAWLERHANDLER_CONFIGURI}. + * The configuration is stored in a separate named graph. + * The resource used for configuration must be {@link #CRAWLERHANDLER_CONFIGURI}. * @param mimeType the mimetype to use for serialization * @return the configuration as string. The URI is {@link #CRAWLERHANDLER_CONFIGURI} ({@value #CRAWLERHANDLER_CONFIGURI}) */ Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/STATE.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/STATE.java 2010-03-16 14:05:36 UTC (rev 2297) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/STATE.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -88,5 +88,19 @@ /** * DatwWraper has been started */ - STARTED + STARTED; + + /** + * Check if the passed STATE is a state of not crawling. Meaning that it is either IDLE, or started. + * @param state + * @return true, if the passed state is not crawling + */ + public static boolean notCrawling(STATE state) { + return (state==STARTED) + ||(state==DONE) + ||(state==DONE_WITH_ERRORS) + ||(state==STOPPED_WITH_ERRORS) + ||(state==IDLE) + ||(state==STOPPED); + } } \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-03-16 14:05:36 UTC (rev 2297) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -48,8 +48,11 @@ import java.util.logging.Level; import java.util.logging.Logger; +import junit.runner.ReloadingTestSuiteLoader; + import org.ontoware.aifbcommons.collection.ClosableIterator; import org.ontoware.rdf2go.ModelFactory; +import org.ontoware.rdf2go.RDF2Go; import org.ontoware.rdf2go.exception.ModelException; import org.ontoware.rdf2go.exception.ModelRuntimeException; import org.ontoware.rdf2go.model.Model; @@ -113,6 +116,11 @@ public static final String CFG_ADMINISTRATOR_PASSWORD = "administrator.password"; /** + * Filename of the datasource config file in the configuration directory + */ + public static final String SERVERCONFIG_FILENAME = "serverconfig.trig"; + + /** * Configuration parameters passed to the server. * They are partly read from a properties file. * @author sauermann @@ -215,6 +223,7 @@ /** * Actual model that stores the configuration data. + * Call {@link #saveConfigurationFile()} after changing it **/ private Model configurationModel; @@ -289,7 +298,7 @@ // Initialize the configuration model // using a file-backed config modelset { - configurationFile = new File(parameters.configurationDirectory, "dsconfig.trig"); + configurationFile = new File(parameters.configurationDirectory, SERVERCONFIG_FILENAME); configurationModelSet = factory.createModelSet(); configurationModelSet.open(); if (configurationFile.exists()) { @@ -318,7 +327,7 @@ // apertureServerCrawlerHandler = new ServerCrawlerHandler( // apertureRegistries, // this); - apertureServerCrawlerHandler = new DrupalCrawlerHandler(registries); + loadCrawlerHandler(); // initialize the data source collection try { @@ -353,7 +362,9 @@ log.exiting(ServerImpl.class.getName(),"init"); } - /** + + + /** * The configuration file was changed in the model, save it */ private synchronized void saveConfigurationFile() { @@ -1387,16 +1398,53 @@ } public String getCrawlerHandlerConfig(String mimeType) { - // TODO Auto-generated method stub - return null; + Model crawlerhandlerconfig = configurationModelSet.getModel(APERTURESERVER_CRAWLERHANDLERCONTEXTURI); + String result = RDFTool.modelToString(crawlerhandlerconfig, Syntax.forMimeType(mimeType)); + return result; } public void setCrawlerHandlerConfig(String newRDFConfiguration, String mimeType) { - // TODO Auto-generated method stub + // Parse, that will cause most errors, if any + Model newModel = RDFTool.stringToModel(newRDFConfiguration, Syntax.forMimeType(mimeType)); + try { + // replace the existing rdf, that should work smoothly + Model crawlerhandlerconfig = configurationModelSet.getModel(APERTURESERVER_CRAWLERHANDLERCONTEXTURI); + crawlerhandlerconfig.removeAll(); + crawlerhandlerconfig.addModel(newModel); + } finally { + newModel.close(); + } + saveConfigurationFile(); + reloadCrawlerHandler(); } + /** + * The crawler handler config was changed, reload the crawler handler + */ + private void reloadCrawlerHandler() { + stop(); + if (apertureServerCrawlerHandler != null) { + apertureServerCrawlerHandler.dispose(); + apertureServerCrawlerHandler = null; + } + loadCrawlerHandler(); + start(); + } + + /** + * Load the crawlerHandler + */ + private void loadCrawlerHandler() { + try { + apertureServerCrawlerHandler = new DrupalCrawlerHandler(this.apertureRegistries, + configurationModelSet.getModel(APERTURESERVER_CRAWLERHANDLERCONTEXTURI)); + } catch (Exception x) { + log.log(Level.WARNING, "Cannot initialize Crawler Handler: "+x, x); + } + } + public DataSourceLog getDataSourceLog(String dataSourceUri) throws ApertureServerException { return getDataSourcePool().getDataSourceLog(dataSourceUri); Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ConfigRESTServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ConfigRESTServlet.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ConfigRESTServlet.java 2010-03-23 19:14:24 UTC (rev 2298) @@ -0,0 +1,127 @@ +package org.semanticdesktop.aperture.servlet; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.PrintWriter; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.io.IOUtils; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Syntax; +import org.ontoware.rdf2go.util.RDFTool; +import org.semanticdesktop.aperture.server.ApertureServer; + +/** + * Configuration via REST. + * + * The interface is similar to the constants defined in {@link ApertureServer} + * + * This intentionally does not use any REST frameworks, as they seem to add a lot of weight. + * <h3>Content negotiation</h3> + * This hack does only support plain and simple single accept headers. Go away with your fancy weights and stuff. Use this: + * <pre> +Accept: application/rdf+xml +</pre> + * + * <h3>Crawlerhandler</h3> + * Supports GET and PUT for the crawlerhandler. + * The address to GET/PUT is /config/api/crawlerhandler/ + * Servlet implementation class for Servlet: ConfigRESTServlet + * + */ + public class ConfigRESTServlet extends javax.servlet.http.HttpServlet implements javax.servlet.Servlet { + static final long serialVersionUID = 1L; + + /** + * REST path for crawlerhandler + */ + static final String CRAWLERHANDLER = "crawlerhandler"; + + /* (non-Java-doc) + * @see javax.servlet.http.HttpServlet#HttpServlet() + */ + public ConfigRESTServlet() { + super(); + } + + /* (non-Java-doc) + * @see javax.servlet.http.HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + // authorize + ApertureServerServlet.authenticateAdministrator(request, response, getServletContext()); + + // the part after /api that indicates what we need to do + String pathinfo = request.getPathInfo(); + if (("/"+CRAWLERHANDLER).equals(pathinfo)) { + // get Crawlerhandler config + Syntax syntax = Syntax.RdfXml; + String accept = request.getHeader("Accept"); + try { + syntax = Syntax.forMimeType(accept); + if (syntax == null) + { + syntax = Syntax.RdfXml; + log("Cannot find RDF Syntax for Accept-Header '"+accept+"'. Falling back to "+syntax.getMimeType()); + } + } catch (Exception x) { + log("Cannot parse Accept-Header '"+accept+"': "+x, x); + } + // return the config + String result = ApertureServerServlet.getApertureServer(getServletContext()).getCrawlerHandlerConfig(syntax.getMimeType()); + response.setContentType(syntax.getMimeType()); + PrintWriter writer = response.getWriter(); + writer.print(result); + } else { + throw new ServletException("Cannot answer "+pathinfo); + } + } + + /* (non-Java-doc) + * @see javax.servlet.http.HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + } + + + @Override + protected void doPut(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + // authorize + if (!ApertureServerServlet.authenticateAdministrator(request, response, getServletContext())) + return; + + String pathinfo = request.getPathInfo(); + String url = request.getRequestURL().toString(); + System.out.println(url); + if (("/"+CRAWLERHANDLER).equals(pathinfo)) { + // PUT Crawlerhandler config + BufferedReader inputReader = request.getReader(); + String input = IOUtils.toString(inputReader); + // Syntax? + String contenttype = request.getContentType(); + Syntax syntax = Syntax.forMimeType(contenttype); + if (syntax == null) { + syntax = Syntax.RdfXml; + log("Content-Type unknown: "+contenttype+". Falling back to "+syntax.getMimeType()); + } + // set it. It will throw exceptions if something goes wrong + ApertureServerServlet.getApertureServer(getServletContext()). + setCrawlerHandlerConfig(input, syntax.getMimeType()); + } else { + throw new ServletException("Cannot answer "+pathinfo); + } + } + + /* (non-Javadoc) + * @see javax.servlet.GenericServlet#init() + */ + public void init() throws ServletException { + // TODO Auto-generated method stub + super.init(); + } +} \ No newline at end of file Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ConfigRESTServlet.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-04-01 12:54:45
|
Revision: 2308 http://aperture.svn.sourceforge.net/aperture/?rev=2308&view=rev Author: leo_sauermann Date: 2010-04-01 12:54:37 +0000 (Thu, 01 Apr 2010) Log Message: ----------- Drupal integration: now using the content type externaldocument, and the Drupal XML RPC works. For aperture-webserver, I see we should separate crawling from storing the crawled dataobjects by putting a queue in between. Best would be a file-based queue. Modified Paths: -------------- aperture-webserver/trunk/README.txt aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java Added Paths: ----------- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/QueuingCrawlerHandler.java Modified: aperture-webserver/trunk/README.txt =================================================================== --- aperture-webserver/trunk/README.txt 2010-04-01 10:43:12 UTC (rev 2307) +++ aperture-webserver/trunk/README.txt 2010-04-01 12:54:37 UTC (rev 2308) @@ -10,6 +10,10 @@ Features ======== - extract plaintext and RDF from files +- Crawl datasources +- REST API for configuration of target system (into which the crawled dataobjects should be stored) +- Uploading crawled dataobjects into target systems. + Currently supported: Drupal == Installation == - put the WAR into a Tomcat >= 5.5 Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-04-01 10:43:12 UTC (rev 2307) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-04-01 12:54:37 UTC (rev 2308) @@ -9,6 +9,7 @@ import org.ontoware.aifbcommons.collection.ClosableIterator; import org.ontoware.rdf2go.model.Model; import org.ontoware.rdf2go.model.Statement; +import org.ontoware.rdf2go.model.Syntax; import org.ontoware.rdf2go.model.node.Resource; import org.ontoware.rdf2go.model.node.Variable; import org.ontoware.rdf2go.util.RDFTool; @@ -165,7 +166,6 @@ } catch (Exception e) { logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); } - // push to drupal try { // connect @@ -173,11 +173,7 @@ connect(); // convert - RDFContainer data = object.getMetadata(); - DrupalNode node = new DrupalNode(); - node.setType(DrupalNode.TYPE_STORY); - node.setTitle(data.getString(NIE.title)); - node.setBody(data.getString(NIE.plainTextContent)); + DrupalNode node = objectToNode(object); service.nodeSave(node); } catch (Exception x) { logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); @@ -186,6 +182,27 @@ super.objectChanged(crawler, object); } + private DrupalNode objectToNode(DataObject object) { + RDFContainer data = object.getMetadata(); + DrupalNode node = new DrupalNode(); + node.setType("externaldocument"); // required by aperture to store URL and datasource + // do "not-null" checks + String title = data.getString(NIE.title); + if (title == null) + title = object.getID().toString(); + node.setTitle(title); + String body = data.getString(NIE.plainTextContent); + if (body == null) + body = " "; + node.setBody(body); + node.put("URL", object.getID().toString()); + node.put("datasource", object.getDataSource().getID().toString()); + // rdf-ify it + String rdfAsString = RDFTool.modelToString(data.getModel(), Syntax.RdfXml); + node.put("rdfdescription", rdfAsString); + return node; + } + @Override public void objectNew(Crawler crawler, DataObject object) { try { @@ -202,16 +219,7 @@ // convert RDFContainer data = object.getMetadata(); - DrupalNode node = new DrupalNode(); - node.setType(DrupalNode.TYPE_STORY); - String title = data.getString(NIE.title); - if (title == null) - title = object.getID().toString(); - node.setTitle(title); - String body = data.getString(NIE.plainTextContent); - if (body == null) - body = " "; - node.setBody(body); + DrupalNode node = objectToNode(object); service.nodeSave(node); } catch (Exception x) { logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-04-01 10:43:12 UTC (rev 2307) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-04-01 12:54:37 UTC (rev 2308) @@ -1,25 +1,16 @@ package org.semanticdesktop.aperture.drupalhandler; - - import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.Charset; -import java.security.InvalidKeyException; -import java.security.NoSuchAlgorithmException; -import java.util.HashMap; import java.util.Map; import java.util.Vector; import java.util.logging.Level; import java.util.logging.Logger; - import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; - import org.apache.xmlrpc.client.XmlRpcClient; import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; -import com.sun.net.ssl.internal.ssl.Debug; - /** * XML-RPC client for the server offered by Drupal. * Drupal has a weird authentication scheme on the server side involving to sign every method @@ -42,6 +33,9 @@ * * This class is written by Leo Sauermann on the basis of work published by Aaron Moline. * + * It is currently part of the Aperture sourceforge project which is BSD licensed, + * if you want to put it elsewhere, do so under this license. + * * @author Aaron Moline <Aar...@mo...> * @author Leo Sauermann <leo...@df...> */ @@ -110,10 +104,8 @@ * //TODO:Get None Generator Working String allowedCharacters = * "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; * StringBuilder password = new StringBuilder(); - * * Random rand = new Random(); for (int i = 0; i < length; i++) { * password.append() //password.append(append); } - * * return password.toString(); */ return "" + System.currentTimeMillis(); Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/QueuingCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/QueuingCrawlerHandler.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/QueuingCrawlerHandler.java 2010-04-01 12:54:37 UTC (rev 2308) @@ -0,0 +1,157 @@ +/** + * + */ +package org.semanticdesktop.aperture.server.impl; + +import java.util.Date; +import java.util.Set; + +import org.ontoware.aifbcommons.collection.ClosableIterator; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Statement; +import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.Variable; +import org.ontoware.rdf2go.util.RDFTool; +import org.ontoware.rdf2go.vocabulary.RDF; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.crawler.Crawler; +import org.semanticdesktop.aperture.crawler.ExitCode; +import org.semanticdesktop.aperture.crawler.base.CrawlerHandlerBase; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierFactory; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierRegistry; +import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.server.impl.ApertureRegistriesAggregate; +import org.semanticdesktop.aperture.vocabulary.NIE; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A crawler handler that extracts the contents from the crawled dataobjects + * and stores the dataobject as RDF/XML serialization into a file in a configured + * folder. + * + * The queue idea is that crawling is separated from pushing the crawled dataobjects + * into the target system. + * + * TODO implement this. + * + * @author sauermann + * + */ +public class QueuingCrawlerHandler extends CrawlerHandlerBase { + + private static Logger logger = LoggerFactory.getLogger(QueuingCrawlerHandler.class); + + + /** + * Create the crawlerhandler and read the configuration. + * Do not access the configmodel after the constructor is finished, rather copy everything you need from the model. + * @param registries the registries to use + * @param configModel the configmodel with settings. Do not access it after the constructor is finished + */ + public QueuingCrawlerHandler(ApertureRegistriesAggregate registries, Model configModel) throws Exception { + super( + // TODO - use something cleverer here + new MagicMimeTypeIdentifier(), + registries.getExtractorRegistry(), null); + readConfiguration(configModel); + } + + /** + * during construction: read the config + * @param configModel + */ + private void readConfiguration(Model configModel) throws Exception { + setExtractingContents(true); + } + + + private static MimeTypeIdentifier getMimeTypeIdentifier( + ApertureRegistriesAggregate apertureRegistries) { + MimeTypeIdentifierRegistry registry + = apertureRegistries.getMimeTypeIdentifierRegistry(); + Set set = registry.getAll(); + if (!set.isEmpty()) { + MimeTypeIdentifierFactory factory + = (MimeTypeIdentifierFactory) set.iterator().next(); + return factory.get(); + } else { + logger.warn("No mime type identifier found. " + + "No full-text extraction possible"); + return new MagicMimeTypeIdentifier(); + } + } + + public void dispose() { + } + + @Override + public void objectChanged(Crawler crawler, DataObject object) { + try { + super.processBinary(crawler, object); + } catch (Exception e) { + logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); + } + + // TODO: Queue + + super.objectChanged(crawler, object); + } + + @Override + public void objectNew(Crawler crawler, DataObject object) { + try { + super.processBinary(crawler, object); + } catch (Exception e) { + logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); + } + + // TODO: Queue + + super.objectNew(crawler, object); + } + + + @Override + public void objectRemoved(Crawler crawler, String url) { + + // TODO: Queue + + super.objectRemoved(crawler, url); + } + + @Override + public void clearFinished(Crawler crawler, ExitCode exitCode) { + // TODO Auto-generated method stub + super.clearFinished(crawler, exitCode); + } + + @Override + public void clearingObject(Crawler crawler, String url) { + // TODO Auto-generated method stub + super.clearingObject(crawler, url); + } + + @Override + public void clearStarted(Crawler crawler) { + // TODO Auto-generated method stub + super.clearStarted(crawler); + } + + @Override + public void crawlStarted(Crawler crawler) { + // TODO Auto-generated method stub + super.crawlStarted(crawler); + } + + @Override + public void crawlStopped(Crawler crawler, ExitCode exitCode) { + // TODO Auto-generated method stub + super.crawlStopped(crawler, exitCode); + } + + + +} Property changes on: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/QueuingCrawlerHandler.java ___________________________________________________________________ Added: svn:keywords + "LastChangedDate LastChangedRevision URL" Added: svn:eol-style + native This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-04-18 17:03:05
|
Revision: 2323 http://aperture.svn.sourceforge.net/aperture/?rev=2323&view=rev Author: leo_sauermann Date: 2010-04-18 17:02:56 +0000 (Sun, 18 Apr 2010) Log Message: ----------- fixed wrong content-encoding in FileInspector, made data-source list a bit nicer, fixed XmlSafe in DrupalCrawlerHandler Modified Paths: -------------- aperture-webserver/trunk/WebContent/config/index.jsp aperture-webserver/trunk/WebContent/config/sourcedetails.jsp aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java Added Paths: ----------- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java Modified: aperture-webserver/trunk/WebContent/config/index.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/index.jsp 2010-04-15 14:23:34 UTC (rev 2322) +++ aperture-webserver/trunk/WebContent/config/index.jsp 2010-04-18 17:02:56 UTC (rev 2323) @@ -49,8 +49,7 @@ <h2>Currenly configured data sources.</h2> <table> <tr class="tableheader"> - <td>Name</td> - <td>Type</td> + <td>Name, Type</td> <td>Status</td> <td>Last crawled</td> <td>Next crawl</td> @@ -58,29 +57,12 @@ </tr> <c:forEach items="${sourcesList}" var="source"> <tr> - <td>${source.name}</td> - <td>${source.typeLabel}</td> + <td><b><a href="sourcedetails.jsp?uri=${source.dataSourceURIEscaped}">${source.name}</a></b><br/>${source.typeLabel}</td> <aperture:statuscell state="${source.crawlingState}" /> <td>${source.lastCrawledString}</td> <td>${source.nextCrawlString}</td> <td> <aperture:crawlbutton bean="${source}"/> - </td> - <td> - <form name="sourceDetailsForm" - action="sourcedetails.jsp" - method="get"> - <input type="hidden" - name="uri" - value="${source.dataSourceURIEscaped}"> - <button name="crawl" - type="submit" - value="details"> - Show Details - </button> - </form> - </td> - <td> <form name="deleteSourceForm" action="deleteSource" method="post"> Modified: aperture-webserver/trunk/WebContent/config/sourcedetails.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-04-15 14:23:34 UTC (rev 2322) +++ aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-04-18 17:02:56 UTC (rev 2323) @@ -57,7 +57,22 @@ <%= fresnelEditor.createSpecificFormPart() %> - <h2><aperture:crawlbutton bean="${bean}"/></h2> + <!-- Actions --> + <p> + <aperture:crawlbutton bean="${bean}"/> + <form name="deleteSourceForm" + action="deleteSource" + method="post"> + <input type="hidden" + name="uri" + value="${source.dataSourceURIEscaped}"> + <button name="crawl" + type="submit" + value="delete"> + Delete + </button> + </form> + </p> <%if (bean.getLastErrorMessage() != null) {%> <h2>Error message</h2> Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-04-15 14:23:34 UTC (rev 2322) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-04-18 17:02:56 UTC (rev 2323) @@ -3,7 +3,7 @@ */ package org.semanticdesktop.aperture.drupalhandler; -import java.util.Date; +import java.util.Map; import java.util.Set; import org.ontoware.aifbcommons.collection.ClosableIterator; @@ -11,10 +11,13 @@ import org.ontoware.rdf2go.model.Statement; import org.ontoware.rdf2go.model.Syntax; import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.Variable; import org.ontoware.rdf2go.util.RDFTool; import org.ontoware.rdf2go.vocabulary.RDF; import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.RDFContainerFactory; +import org.semanticdesktop.aperture.accessor.base.RDFContainerFactoryImpl; import org.semanticdesktop.aperture.crawler.Crawler; import org.semanticdesktop.aperture.crawler.base.CrawlerHandlerBase; import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; @@ -22,6 +25,8 @@ import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierRegistry; import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.rdf.util.XmlSafetyUtils; import org.semanticdesktop.aperture.server.impl.ApertureRegistriesAggregate; import org.semanticdesktop.aperture.vocabulary.NIE; import org.slf4j.Logger; @@ -67,7 +72,36 @@ * The service, it is initialized in connect() and closed in dispose() */ DrupalXmlRpcService service; + + /** + * one RDF container factory, can be reused as it creates in-memory rdf containers. + */ + private RDFContainerFactory rdfContainerFactory; + /** + * This class exists because this fuckup exists: + * https://sourceforge.net/tracker/index.php?func=detail&aid=2989040&group_id=150969&atid=779503 + * Once this ticket is closed, I guess this class can be removed! + * @author leo.sauermann + * + */ + class XmlSafeRDFContainerFactory extends RDFContainerFactoryImpl { + + @Override + public RDFContainerImpl getRDFContainer(URI uri) { + return newInstance(uri); + } + + @Override + public RDFContainerImpl newInstance(String uri) { + return (RDFContainerImpl)XmlSafetyUtils.wrapXmlSafeRDFContainer(super.newInstance(uri)); + } + + @Override + public RDFContainerImpl newInstance(URI uri) { + return (RDFContainerImpl)XmlSafetyUtils.wrapXmlSafeRDFContainer(super.newInstance(uri)); + } + } /** * Create the crawlerhandler and read the configuration. @@ -80,7 +114,6 @@ // TODO - use something cleverer here new MagicMimeTypeIdentifier(), registries.getExtractorRegistry(), null); - readConfiguration(configModel); } @@ -112,24 +145,7 @@ } - private static MimeTypeIdentifier getMimeTypeIdentifier( - ApertureRegistriesAggregate apertureRegistries) { - MimeTypeIdentifierRegistry registry - = apertureRegistries.getMimeTypeIdentifierRegistry(); - Set set = registry.getAll(); - if (!set.isEmpty()) { - MimeTypeIdentifierFactory factory - = (MimeTypeIdentifierFactory) set.iterator().next(); - return factory.get(); - } else { - logger.warn("No mime type identifier found. " + - "No full-text extraction possible"); - return new MagicMimeTypeIdentifier(); - } - } - - public DrupalCrawlerHandler(String domain, String apikey, String serviceUrl, String username, String password) { super(); @@ -158,6 +174,20 @@ service = null; } } + + /** + * Connections to Drupal are done via XML-RPC, so we need XmlSafeRDFContainers. + */ + @Override + public RDFContainerFactory getRDFContainerFactory(Crawler crawler, + String url) { + // we can reuse the same container factory. + if (rdfContainerFactory == null) + { + rdfContainerFactory = new XmlSafeRDFContainerFactory(); + } + return rdfContainerFactory; + } @Override public void objectChanged(Crawler crawler, DataObject object) { @@ -182,7 +212,7 @@ super.objectChanged(crawler, object); } - private DrupalNode objectToNode(DataObject object) { + protected static DrupalNode objectToNode(DataObject object) { RDFContainer data = object.getMetadata(); DrupalNode node = new DrupalNode(); node.setType("externaldocument"); // required by aperture to store URL and datasource @@ -199,6 +229,12 @@ node.put("datasource", object.getDataSource().getID().toString()); // rdf-ify it String rdfAsString = RDFTool.modelToString(data.getModel(), Syntax.RdfXml); + // stupidity check + for (Map.Entry<String, Object> entry : node.entrySet()) + { + if (entry.getValue() == null) + logger.error("key "+entry.getKey()+" for entry "+object.getID().toString()+" has a <null> value."); + } node.put("rdfdescription", rdfAsString); return node; } Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java 2010-04-18 17:02:56 UTC (rev 2323) @@ -0,0 +1,106 @@ +package org.semanticdesktop.aperture.drupalhandler; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Syntax; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.base.DataObjectBase; +import org.semanticdesktop.aperture.datasource.DataSource; +import org.semanticdesktop.aperture.datasource.filesystem.FileSystemDataSource; +import org.semanticdesktop.aperture.extractor.word.WordExtractor; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.rdf.util.XmlSafetyUtils; + +/** + * This is an example class showing how Aperture and Drupal may work together to upload files + * @author leo.sauermann + * + */ +public class DrupalCrawlerUploadExample { + + /** + * @param args + */ + public static void main(String[] args) throws Exception { + DataObject o=null; + try { + + // connect to drupal + DrupalXmlRpcService drupal = new DrupalXmlRpcService("localhost", + "dac5b06a2e63eed9336ce24f5e56a181", + "http://localhost/organikdrupal/?q=services/xmlrpc"); + drupal.connect(); + drupal.login("root", "root"); + try { + o = extractDataObject(); + DrupalNode node = DrupalCrawlerHandler.objectToNode(o); + drupal.nodeSave(node); + } finally { + drupal.logout(); + } + } finally { + if (o!=null) + o.dispose(); + } + System.out.println("done"); + } + + public static DataObject extractDataObject() throws Exception { + System.out.print("Extracting Data Object ... "); + File f = new File("C:/1/test.doc"); + URI uri = new URIImpl( + f.toURI().toASCIIString()); + WordExtractor extractor = new WordExtractor(); + + + Model m = RDF2Go.getModelFactory().createModel(); + m.open(); + m = XmlSafetyUtils.wrapXmlSafeModel(m); + RDFContainerImpl rdfcontainer = new RDFContainerImpl(m, uri); + + InputStream in = new FileInputStream(f); + try { + extractor.extract(uri, in, null, null, rdfcontainer); + } finally { + in.close(); + } + + DataSource ds = new FileSystemDataSource(); + ds + .setConfiguration(new RDFContainerImpl( + RDF2Go.getModelFactory().createModel().open(), + new URIImpl( + "urn:aperture:server:datasource:46def7e5-2344-4bc9-b36f-01463e0d846e"))); + DataObject o = new DataObjectBase(uri, ds, rdfcontainer); + System.out.println("done."); + return o; + } + + + public static DataObject loadDataObject() throws Exception { + System.out.print("Loading Data Object ... "); + Model m = RDF2Go.getModelFactory().createModel(); + m.open(); + m.readFrom(new FileInputStream("C:\\1\\text.xml"), Syntax.RdfXml); + URI uri = new URIImpl( + "file:/C:/1/test.doc"); + RDFContainerImpl rdfcontainer = new RDFContainerImpl(m, uri); + + DataSource ds = new FileSystemDataSource(); + ds + .setConfiguration(new RDFContainerImpl( + RDF2Go.getModelFactory().createModel().open(), + new URIImpl( + "urn:aperture:server:datasource:46def7e5-2344-4bc9-b36f-01463e0d846e"))); + DataObject o = new DataObjectBase(uri, ds, rdfcontainer); + System.out.println("done."); + return o; + } + +} Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-04-15 14:23:34 UTC (rev 2322) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-04-18 17:02:56 UTC (rev 2323) @@ -10,6 +10,7 @@ import javax.crypto.spec.SecretKeySpec; import org.apache.xmlrpc.client.XmlRpcClient; import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.apache.xmlrpc.common.XmlRpcExtensionException; /** * XML-RPC client for the server offered by Drupal. @@ -90,6 +91,7 @@ // initialize the xmlRpcClient, it won't change as the parameters are final XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); config.setServerURL(new URL(this.serviceURL)); + config.setEncoding("UTF-8"); //Leo Sauermann: experimental, I had problems with umlauts. xmlRpcClient = new XmlRpcClient(); xmlRpcClient.setConfig(config); } @@ -236,7 +238,19 @@ public void nodeSave(DrupalNode node) throws Exception { Vector<Object> params = generateDefaultParams(MethodNodeSave); params.add(node); - Object o = xmlRpcClient.execute(MethodNodeSave, params); + Object o; + try { + o = xmlRpcClient.execute(MethodNodeSave, params); + } catch (XmlRpcExtensionException x) { + // was it a null value? + // stupidity check + for (Map.Entry<String, Object> entry : node.entrySet()) + { + if (entry.getValue() == null) + log.severe("key "+entry.getKey()+" has a <null> value."); + } + throw x; + } if (log.isLoggable(Level.FINEST)) log.finest(MethodNodeSave+" returned "+o.toString()); } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java 2010-04-15 14:23:34 UTC (rev 2322) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/FileInspector.java 2010-04-18 17:02:56 UTC (rev 2323) @@ -44,266 +44,261 @@ import org.semanticdesktop.aperture.vocabulary.NIE; /** - * Servlet implementation class for Servlet: FileInspector - * author: Benjamin Horak + * Servlet implementation class for Servlet: FileInspector + * The character encoding is by default UTF-8. + * + * @author Benjamin Horak + * @author Leo Sauermann */ -public class FileInspector extends javax.servlet.http.HttpServlet implements javax.servlet.Servlet { - /* - * (non-Java-doc) - * - * @see javax.servlet.http.HttpServlet#HttpServlet() - */ - public FileInspector() { - super(); - } - - public String getMimeType(String url) throws Exception { - HttpClient client = new HttpClient(); - GetMethod get = new GetMethod(url); - get.setFollowRedirects(true); - int httpResult = client.executeMethod(get); - if (httpResult == 200) { - return getMimeType(get.getResponseBodyAsStream(), url); +public class FileInspector extends javax.servlet.http.HttpServlet implements + javax.servlet.Servlet { + /* + * (non-Java-doc) + * + * @see javax.servlet.http.HttpServlet#HttpServlet() + */ + public FileInspector() { + super(); } - throw new HttpException("Invalid result: HTTP code is " + httpResult); - } - public List<String> extractHyperlinks(String url) throws Exception { - HttpClient client = new HttpClient(); - GetMethod get = new GetMethod(url); - get.setFollowRedirects(true); - int httpResult = client.executeMethod(get); - if (httpResult == 200) { - return extractHyperlinks(get.getResponseBodyAsStream()); + public String getMimeType(String url) throws Exception { + HttpClient client = new HttpClient(); + GetMethod get = new GetMethod(url); + get.setFollowRedirects(true); + int httpResult = client.executeMethod(get); + if (httpResult == 200) { + return getMimeType(get.getResponseBodyAsStream(), url); + } + throw new HttpException("Invalid result: HTTP code is " + httpResult); } - throw new HttpException("Invalid result: HTTP code is " + httpResult); - } - public String inspectFile(String url, String mimeType, List<String> links) throws Exception { - HttpClient client = new HttpClient(); - GetMethod get = new GetMethod(url); - get.setFollowRedirects(true); - int httpResult = client.executeMethod(get); - if (httpResult == 200) { - return inspectFile(get.getResponseBodyAsStream(), url, mimeType, links); + public List<String> extractHyperlinks(String url) throws Exception { + HttpClient client = new HttpClient(); + GetMethod get = new GetMethod(url); + get.setFollowRedirects(true); + int httpResult = client.executeMethod(get); + if (httpResult == 200) { + return extractHyperlinks(get.getResponseBodyAsStream()); + } + throw new HttpException("Invalid result: HTTP code is " + httpResult); } - throw new HttpException("Invalid result: HTTP code is " + httpResult); - } - @Override - protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - String mimeType = null; - String urls = request.getParameter("url"); - List<String> links = new ArrayList<String>(); - String rdf; - try { - if (urls != null) { - urls = urls.replace(" ", "%20"); - mimeType = getMimeType(urls); - - if (mimeType.equals("text/html")) { - links = extractHyperlinks(urls); + public RDFContainer inspectFile(String url, String mimeType, List<String> links) + throws Exception { + HttpClient client = new HttpClient(); + GetMethod get = new GetMethod(url); + get.setFollowRedirects(true); + int httpResult = client.executeMethod(get); + if (httpResult == 200) { + return inspectFile(get.getResponseBodyAsStream(), url, mimeType, + links); } - - rdf = inspectFile(urls, mimeType, links); - response.getOutputStream().print(rdf); - return; - } - } catch (Exception e) { - throw new ServletException(e); + throw new HttpException("Invalid result: HTTP code is " + httpResult); } - } - /* - * (non-Java-doc) - * - * @see javax.servlet.http.HttpServlet#doPost(HttpServletRequest - * request, HttpServletResponse response) - */ - protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, - IOException { - String rdf; - List<String> links = new ArrayList<String>(); + @Override + protected void doGet(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + String mimeType = null; + String urls = request.getParameter("url"); + List<String> links = new ArrayList<String>(); + try { + if (urls != null) { + urls = urls.replace(" ", "%20"); + mimeType = getMimeType(urls); - try { - String mimeType = null; - String urls = request.getParameter("url"); - if (urls != null) { - urls = urls.replace(" ", "%20"); - mimeType = getMimeType(urls); + if (mimeType.equals("text/html")) { + links = extractHyperlinks(urls); + } - if (mimeType != null && mimeType.equals("text/html")) { - links = extractHyperlinks(urls); + RDFContainer rdf = inspectFile(urls, mimeType, links); + // Print the RDF as UTF-8. Note that Model.writeTo() is hardcoded to write UTF-8 + response.setCharacterEncoding("UTF-8"); + rdf.getModel().writeTo(response.getOutputStream(), Syntax.RdfXml); + return; + } + } catch (Exception e) { + throw new ServletException(e); } + } - rdf = inspectFile(urls, mimeType, links); - response.getOutputStream().print(rdf); - return; - // } else { - // throw new ServletException(get.getResponseBodyAsString()); - // } - } + /* + * (non-Java-doc) + * + * @see javax.servlet.http.HttpServlet#doPost(HttpServletRequest request, + * HttpServletResponse response) + */ + protected void doPost(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + List<String> links = new ArrayList<String>(); - // Create a factory for disk-based file items - FileItemFactory factory = new DiskFileItemFactory(); + try { + String mimeType = null; + String urls = request.getParameter("url"); + if (urls != null) { + urls = urls.replace(" ", "%20"); + mimeType = getMimeType(urls); - // Create a new file upload handler - ServletFileUpload upload = new ServletFileUpload(factory); + if (mimeType != null && mimeType.equals("text/html")) { + links = extractHyperlinks(urls); + } - // Parse the request - List<FileItem> /* FileItem */items = upload.parseRequest(request); + RDFContainer rdf = inspectFile(urls, mimeType, links); + // Print the RDF as UTF-8. Note that Model.writeTo() is hardcoded to write UTF-8 + response.setCharacterEncoding("UTF-8"); + rdf.getModel().writeTo(response.getOutputStream(), Syntax.RdfXml); + return; + // } else { + // throw new ServletException(get.getResponseBodyAsString()); + // } + } - for (FileItem fileItem : items) { - File uploadedFile = File.createTempFile(fileItem.getName(), ".tmp"); - fileItem.write(uploadedFile); - mimeType = getMimeType(new FileInputStream(uploadedFile), uploadedFile.toURI().toString()); - if (mimeType != null && mimeType.equals("text/html")) { - links = extractHyperlinks(new FileInputStream(uploadedFile)); - } - rdf = inspectFile(new FileInputStream(uploadedFile), uploadedFile.toURI().toString(), mimeType, links); - uploadedFile.delete(); - response.getOutputStream().print(rdf); - return; - } + // Create a factory for disk-based file items + FileItemFactory factory = new DiskFileItemFactory(); - } catch (Exception e) { - e.printStackTrace(); - throw new ServletException(e); - } - } + // Create a new file upload handler + ServletFileUpload upload = new ServletFileUpload(factory); - private String getMimeType(InputStream stream, String path) throws IOException { - MimeTypeIdentifier identifier = new MagicMimeTypeIdentifier(); - int minimumArrayLength = identifier.getMinArrayLength(); - int bufferSize = Math.max(minimumArrayLength, 8192); - BufferedInputStream buffer = new BufferedInputStream(stream, bufferSize); - buffer.mark(minimumArrayLength + 10); // add some for safety - byte[] bytes = IOUtil.readBytes(buffer, minimumArrayLength); + // Parse the request + List<FileItem> /* FileItem */items = upload.parseRequest(request); - // let the MimeTypeIdentifier determine the MIME type of this file - String mimeType = identifier.identify(bytes, path, null); + for (FileItem fileItem : items) { + File uploadedFile = File.createTempFile(fileItem.getName(), + ".tmp"); + fileItem.write(uploadedFile); + mimeType = getMimeType(new FileInputStream(uploadedFile), + uploadedFile.toURI().toString()); + if (mimeType != null && mimeType.equals("text/html")) { + links = extractHyperlinks(new FileInputStream(uploadedFile)); + } + RDFContainer rdf = inspectFile(new FileInputStream(uploadedFile), uploadedFile.toURI().toString(), mimeType, links); + uploadedFile.delete(); + // Print the RDF as UTF-8. Note that Model.writeTo() is hardcoded to write UTF-8 + response.setCharacterEncoding("UTF-8"); + rdf.getModel().writeTo(response.getOutputStream(), Syntax.RdfXml); + return; + } - if(mimeType == null) { - throw new IOException("Could not identify mimetype of: " + path +". Therefore document normalization is not possible."); + } catch (Exception e) { + e.printStackTrace(); + throw new ServletException(e); + } } - - return mimeType; - } - private List<String> extractHyperlinks(InputStream stream) throws Exception { - BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); + private String getMimeType(InputStream stream, String path) + throws IOException { + MimeTypeIdentifier identifier = new MagicMimeTypeIdentifier(); + int minimumArrayLength = identifier.getMinArrayLength(); + int bufferSize = Math.max(minimumArrayLength, 8192); + BufferedInputStream buffer = new BufferedInputStream(stream, bufferSize); + buffer.mark(minimumArrayLength + 10); // add some for safety + byte[] bytes = IOUtil.readBytes(buffer, minimumArrayLength); - ArrayList<String> list = new ArrayList<String>(); + // let the MimeTypeIdentifier determine the MIME type of this file + String mimeType = identifier.identify(bytes, path, null); - boolean tag = false; - boolean link = false; - boolean label = false; - boolean a = true; - - StringBuffer buffer = new StringBuffer(); - - for (String line = ""; line != null; line = reader.readLine()) { - char[] cline = line.toCharArray(); - for (char c : cline) { - if (!label && !link && c == '<') { - tag = true; - } else if (!label && !link && tag && c == 'a') { - a = true; - } else if (!label && !link && tag && a && c == ' ') { - link = true; - } else if (!label && link && c == '>') { - label = true; - } else if (label && c != '<') { - if (c == ' ' || (c >= 65 && c <= 90) || (c >= 97 && c <= 122)) { - buffer.append(c); - } - } else if (label && c == '<') { - if (buffer.length() > 0) { - String s = buffer.toString().trim().toLowerCase(); - if (s.length() > 0 && s.length() <= 30) { - if (!s.startsWith("http") && !s.startsWith("www") && !s.startsWith("ftp") - && !s.startsWith("mailto")) { - list.add(s); - } - } - } - buffer.setLength(0); - a = false; - tag = false; - link = false; - label = false; + if (mimeType == null) { + throw new IOException("Could not identify mimetype of: " + path + + ". Therefore document normalization is not possible."); } - if (c == '>') { - tag = false; - a = false; - } - } + return mimeType; } - return list; + private List<String> extractHyperlinks(InputStream stream) throws Exception { + BufferedReader reader = new BufferedReader( + new InputStreamReader(stream)); - } + ArrayList<String> list = new ArrayList<String>(); - private String inspectFile(InputStream stream, String path, String mimeType, List<String> hyperlinks) - throws Exception { + boolean tag = false; + boolean link = false; + boolean label = false; + boolean a = true; - StringWriter stringWriter = new StringWriter(); - // create a MimeTypeIdentifier + StringBuffer buffer = new StringBuffer(); - // create an ExtractorRegistry containing all Extractors - ExtractorRegistry extractorRegistry = new DefaultExtractorRegistry(); + for (String line = ""; line != null; line = reader.readLine()) { + char[] cline = line.toCharArray(); + for (char c : cline) { + if (!label && !link && c == '<') { + tag = true; + } else if (!label && !link && tag && c == 'a') { + a = true; + } else if (!label && !link && tag && a && c == ' ') { + link = true; + } else if (!label && link && c == '>') { + label = true; + } else if (label && c != '<') { + if (c == ' ' || (c >= 65 && c <= 90) + || (c >= 97 && c <= 122)) { + buffer.append(c); + } + } else if (label && c == '<') { + if (buffer.length() > 0) { + String s = buffer.toString().trim().toLowerCase(); + if (s.length() > 0 && s.length() <= 30) { + if (!s.startsWith("http") && !s.startsWith("www") + && !s.startsWith("ftp") + && !s.startsWith("mailto")) { + list.add(s); + } + } + } + buffer.setLength(0); + a = false; + tag = false; + link = false; + label = false; + } - BufferedInputStream buffer = new BufferedInputStream(stream); + if (c == '>') { + tag = false; + a = false; + } + } + } - // skip the extraction phase when the MIME type could not be determined - if (mimeType == null) { - System.err.println("WARNING: MIME type could not be established."); - } else { + return list; - // create the RDFContainer that will hold the RDF model - RDFContainerFactoryImpl containerFactory = new RDFContainerFactoryImpl(); - RDFContainer container = containerFactory.newInstance(path); + } - // determine and apply an Extractor that can handle this MIME - Set factories = extractorRegistry.get(mimeType); - if (factories != null && !factories.isEmpty()) { - // just fetch the first available Extractor - ExtractorFactory factory = (ExtractorFactory) factories.iterator().next(); - Extractor extractor = factory.get(); + private RDFContainer inspectFile(InputStream stream, String path, + String mimeType, List<String> hyperlinks) throws Exception { - // apply the extractor on the specified file - extractor.extract(container.getDescribedUri(), buffer, null, mimeType, container); - } - // add the MIME type as an additional statement to the RDF model - container.add(NIE.mimeType, mimeType); - for (String link : hyperlinks) { - container.add(NIE.keyword, link); - } - // report the output to System.out - container.getModel().writeTo(stringWriter, Syntax.RdfXml); - container.dispose(); - } - buffer.close(); + StringWriter stringWriter = new StringWriter(); + // create a MimeTypeIdentifier - String out = stringWriter.toString(); + // create an ExtractorRegistry containing all Extractors + ExtractorRegistry extractorRegistry = new DefaultExtractorRegistry(); - if (out == null || out.length() == 0) { - throw new Exception("Invalid content for: " + path); - } + BufferedInputStream buffer = new BufferedInputStream(stream); - out = out.replace("
", ""); - String patternStr = "\\s+"; - String replaceStr = " "; - Pattern pattern = Pattern.compile(patternStr); - Matcher matcher = pattern.matcher(out); - out = matcher.replaceAll(replaceStr); + // skip the extraction phase when the MIME type could not be determined + if (mimeType == null) + throw new Exception("MIME Type can not be extracted"); - patternStr = "[^\\w\\p{Punct}\\s\xE4\xFC\xF6\xC4\xDC\xD6\xDF]"; - replaceStr = ""; - pattern = Pattern.compile(patternStr); - matcher = pattern.matcher(out); - out = matcher.replaceAll(replaceStr); - return out; - } + // create the RDFContainer that will hold the RDF model + RDFContainerFactoryImpl containerFactory = new RDFContainerFactoryImpl(); + RDFContainer container = containerFactory.newInstance(path); + + // determine and apply an Extractor that can handle this MIME + Set factories = extractorRegistry.get(mimeType); + if (factories != null && !factories.isEmpty()) { + // just fetch the first available Extractor + ExtractorFactory factory = (ExtractorFactory) factories + .iterator().next(); + Extractor extractor = factory.get(); + + // apply the extractor on the specified file + extractor.extract(container.getDescribedUri(), buffer, null, + mimeType, container); + } + // add the MIME type as an additional statement to the RDF model + container.add(NIE.mimeType, mimeType); + for (String link : hyperlinks) { + container.add(NIE.keyword, link); + } + return container; + } } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-04-18 20:35:54
|
Revision: 2324 http://aperture.svn.sourceforge.net/aperture/?rev=2324&view=rev Author: leo_sauermann Date: 2010-04-18 20:35:48 +0000 (Sun, 18 Apr 2010) Log Message: ----------- Aperture-webserver: * better debugging and logging for drupal * implemented objectchanged and objectremoved using Drupal externaldocument.update and externaldocument.delete Modified Paths: -------------- aperture-webserver/trunk/WebContent/config/datasourcereport.jsp aperture-webserver/trunk/WebContent/config/sourcedetails.jsp aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java Modified: aperture-webserver/trunk/WebContent/config/datasourcereport.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/datasourcereport.jsp 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/WebContent/config/datasourcereport.jsp 2010-04-18 20:35:48 UTC (rev 2324) @@ -125,7 +125,7 @@ Date logdate = new Date(log.getMillis()); request.setAttribute("logdate", dateformat.format(logdate)); %> - <tr><td>${log.level}</td><td>${logdate}</td><td>${log.message}</td></tr> + <tr><td valign="top">${log.level}</td><td valign="top">${logdate}</td><td>${log.message}</td></tr> <% } %> Modified: aperture-webserver/trunk/WebContent/config/sourcedetails.jsp =================================================================== --- aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/WebContent/config/sourcedetails.jsp 2010-04-18 20:35:48 UTC (rev 2324) @@ -44,6 +44,35 @@ </center> <% } else { %> <h1>Details of '${bean.name}' data source</h1> + <table > + <TR> + <TD class="rowheader">Crawl started:</TD> + <td class="configinput">${bean.crawlStartedString}</td> + </TR> + <TR> + <TD class="rowheader">Crawl stopped:</TD> + <td class="configinput">${bean.crawlStoppedString}</td> + </TR> + <TR> + <TD class="rowheader">New objects:</TD> + <td class="configinput">${bean.newObjects}</td> + </TR> + <TR> + <TD class="rowheader">Changed objects:</TD> + <td class="configinput">${bean.modifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Unchanged objects:</TD> + <td class="configinput">${bean.unmodifiedObjects}</td> + </TR> + <TR> + <TD class="rowheader">Removed objects:</TD> + <td class="configinput">${bean.deletedObjects}</td> + </TR> + <TR><TD> </TD><TD colspan="2"><a href="datasourcereport.jsp?uri=${bean.dataSourceURI}"><b>Detailed report...<b></a></TD></TR> + <tr><td> </td><td><a href="datasourceaccess.jsp?uri=${bean.dataSourceURI}">List all resources...</a> <b>Attention</b>: may be huge</td></tr> + + </table> <h2>Basic data source information</h2> <%= fresnelEditor.createCommonFormPart() %> @@ -85,36 +114,8 @@ </table> <%} %> - <h2>Reports</h2> - <a href="datasourcereport.jsp?uri=${bean.dataSourceURI}"><b>detailed report<b></a> - <h3>Crawl report</h3> - <table > - <TR> - <TD class="rowheader">Crawl started:</TD> - <td class="configinput">${bean.crawlStartedString}</td> - </TR> - <TR> - <TD class="rowheader">Crawl stopped:</TD> - <td class="configinput">${bean.crawlStoppedString}</td> - </TR> - <TR> - <TD class="rowheader">New objects:</TD> - <td class="configinput">${bean.newObjects}</td> - </TR> - <TR> - <TD class="rowheader">Changed objects:</TD> - <td class="configinput">${bean.modifiedObjects}</td> - </TR> - <TR> - <TD class="rowheader">Unchanged objects:</TD> - <td class="configinput">${bean.unmodifiedObjects}</td> - </TR> - <TR> - <TD class="rowheader">Removed objects:</TD> - <td class="configinput">${bean.deletedObjects}</td> - </TR> - </table> + <h2><a href="index.jsp">Back to the data sources list</a></h2> <% bean.dispose(); } %> Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-04-18 20:35:48 UTC (rev 2324) @@ -4,7 +4,8 @@ package org.semanticdesktop.aperture.drupalhandler; import java.util.Map; -import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; import org.ontoware.aifbcommons.collection.ClosableIterator; import org.ontoware.rdf2go.model.Model; @@ -20,17 +21,14 @@ import org.semanticdesktop.aperture.accessor.base.RDFContainerFactoryImpl; import org.semanticdesktop.aperture.crawler.Crawler; import org.semanticdesktop.aperture.crawler.base.CrawlerHandlerBase; -import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; -import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierFactory; -import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifierRegistry; import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.rdf.util.XmlSafetyUtils; +import org.semanticdesktop.aperture.server.datasource.DataSourcePool; import org.semanticdesktop.aperture.server.impl.ApertureRegistriesAggregate; +import org.semanticdesktop.aperture.server.impl.ServerImpl; import org.semanticdesktop.aperture.vocabulary.NIE; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * A crawler handler that speaks to a drupal server via XML-RPC. @@ -38,10 +36,15 @@ * */ public class DrupalCrawlerHandler extends CrawlerHandlerBase { - - private static Logger logger = LoggerFactory.getLogger(DrupalCrawlerHandler.class); /** + * the default logger. + * It is returned by getLogger when no good parameters are given, + * but YOU should not use it but use getLogger() instead so that the user can see your log messages. + */ + static private Logger default_logger = Logger.getLogger(DrupalCrawlerHandler.class.getName()); + + /** * Domain needed to connect to drupal */ String domain; @@ -77,6 +80,12 @@ * one RDF container factory, can be reused as it creates in-memory rdf containers. */ private RDFContainerFactory rdfContainerFactory; + + /** + * ServerImpl to get the dataSourcePool to get the special logger for each datasource to report problems + * when handling data. + */ + private ServerImpl server; /** * This class exists because this fuckup exists: @@ -108,16 +117,34 @@ * Do not access the configmodel after the constructor is finished, rather copy everything you need from the model. * @param registries the registries to use * @param configModel the configmodel with settings. Do not access it after the constructor is finished + * @param ServerImpl to get the the dataSourcePool to get the special logger for each datasource to report problems + * when handling data. */ - public DrupalCrawlerHandler(ApertureRegistriesAggregate registries, Model configModel) throws Exception { + public DrupalCrawlerHandler(ApertureRegistriesAggregate registries, Model configModel, ServerImpl server) throws Exception { super( // TODO - use something cleverer here new MagicMimeTypeIdentifier(), registries.getExtractorRegistry(), null); readConfiguration(configModel); + this.server = server; } /** + * Get the logger for the specific datasource of the passed crawler. + * @param crawler + * @return + */ + public Logger getLogger(Crawler crawler) { + try { + if ((crawler!=null)&&(crawler.getDataSource()!=null)&&(crawler.getDataSource().getID()!=null)) + return server.getDataSourcePool().getDataSourceLogger(crawler.getDataSource().getID().toString()); + } catch (Exception x) { + return default_logger; + } + return default_logger; + } + + /** * during construction: read the config * @param configModel */ @@ -194,7 +221,7 @@ try { super.processBinary(crawler, object); } catch (Exception e) { - logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); + getLogger(crawler).log(Level.WARNING, "cannot extract binary from '"+object.getID()+"': "+e,e); } // push to drupal try { @@ -204,9 +231,19 @@ // convert DrupalNode node = objectToNode(object); - service.nodeSave(node); + try { + service.externaldocumentUpdate(node); + } catch (Exception x) { + // was it "not found"? + if (x.getMessage().contains(" not found")) { + // well, then warn about this and add it again + int nid = service.nodeSave(node); + getLogger(crawler).log(Level.WARNING,"Document '"+object.getID()+"' was detected as changed, but Drupal did not contain it anymore. It was added again with NID "+nid); + } else + throw x; + } } catch (Exception x) { - logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); + getLogger(crawler).log(Level.WARNING,"cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); } super.objectChanged(crawler, object); @@ -233,7 +270,8 @@ for (Map.Entry<String, Object> entry : node.entrySet()) { if (entry.getValue() == null) - logger.error("key "+entry.getKey()+" for entry "+object.getID().toString()+" has a <null> value."); + // this is intentionally using the static logger - it could be changed though to make this method non-static, pass in the crawler, and use getLogger() + default_logger.warning("key "+entry.getKey()+" for entry "+object.getID().toString()+" has a <null> value."); } node.put("rdfdescription", rdfAsString); return node; @@ -244,7 +282,7 @@ try { super.processBinary(crawler, object); } catch (Exception e) { - logger.warn("cannot extract binary from '"+object.getID()+"': "+e,e); + getLogger(crawler).log(Level.WARNING,"cannot extract binary from '"+object.getID()+"': "+e,e); } // push to drupal @@ -258,7 +296,7 @@ DrupalNode node = objectToNode(object); service.nodeSave(node); } catch (Exception x) { - logger.warn("cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); + getLogger(crawler).log(Level.WARNING,"cannot store extracted content from '"+object.getID()+"' to drupal: "+x,x); } super.objectNew(crawler, object); } @@ -269,7 +307,17 @@ @Override public void objectRemoved(Crawler crawler, String url) { - super.objectRemoved(crawler, url); + // push to drupal + try { + // connect + if (!connected()) + connect(); + + service.externaldocumentDelete(url, crawler.getDataSource().getID().toString()); + } catch (Exception x) { + getLogger(crawler).log(Level.WARNING,"cannot remove dataobject '"+url+"' from drupal: "+x,x); + } + } } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java 2010-04-18 20:35:48 UTC (rev 2324) @@ -16,6 +16,7 @@ import org.semanticdesktop.aperture.extractor.word.WordExtractor; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.rdf.util.XmlSafetyUtils; +import org.semanticdesktop.aperture.vocabulary.NIE; /** * This is an example class showing how Aperture and Drupal may work together to upload files @@ -32,8 +33,8 @@ try { // connect to drupal - DrupalXmlRpcService drupal = new DrupalXmlRpcService("localhost", - "dac5b06a2e63eed9336ce24f5e56a181", + DrupalXmlRpcService drupal = new DrupalXmlRpcService("aperture", + "97af6a61c5ca8ec6680be4458c7ad97b", "http://localhost/organikdrupal/?q=services/xmlrpc"); drupal.connect(); drupal.login("root", "root"); @@ -41,6 +42,17 @@ o = extractDataObject(); DrupalNode node = DrupalCrawlerHandler.objectToNode(o); drupal.nodeSave(node); + + // now update this node + o.getMetadata().put(NIE.title, "changed !"); + node = DrupalCrawlerHandler.objectToNode(o); + int nidupdate = drupal.externaldocumentUpdate(node); + System.out.println("updated node with ID "+nidupdate); + + // now delete the node + int niddelete = drupal.externaldocumentDelete(o.getID().toString(), o.getDataSource().getID().toString()); + + System.out.println("deleted node with ID "+niddelete); } finally { drupal.logout(); } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalNode.java 2010-04-18 20:35:48 UTC (rev 2324) @@ -19,12 +19,19 @@ */ public static final String TYPE_STORY = "story"; + /** + * Properties of a Node. + * Taken from + */ public static String NID = "nid"; public static String TYPE = "type"; public static String LANGUAGE = "language"; public static String UID = "uid"; public static String STATUS = "status"; public static String CREATED = "created"; + /** + * Set changed when saving existing nodes (=update) + */ public static String CHANGED = "changed"; public static String TITLE = "title"; public static String BODY = "body"; Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-04-18 20:35:48 UTC (rev 2324) @@ -52,6 +52,8 @@ public static final String MethodUserLogin = "user.login"; public static final String MethodFileSave = "file.save"; public static final String MethodTestCount = "test.count"; + public static final String MethodExternalDocumentUpdate = "externaldocument.update"; + public static final String MethodExternalDocumentDelete = "externaldocument.delete"; Logger log = Logger.getLogger(DrupalXmlRpcService.class.getName()); @@ -235,23 +237,99 @@ * Call node.save * @param node the node to save */ - public void nodeSave(DrupalNode node) throws Exception { + public int nodeSave(DrupalNode node) throws Exception { Vector<Object> params = generateDefaultParams(MethodNodeSave); params.add(node); Object o; try { o = xmlRpcClient.execute(MethodNodeSave, params); } catch (XmlRpcExtensionException x) { - // was it a null value? - // stupidity check - for (Map.Entry<String, Object> entry : node.entrySet()) - { - if (entry.getValue() == null) - log.severe("key "+entry.getKey()+" has a <null> value."); - } + checkNullValue(x, node); throw x; } if (log.isLoggable(Level.FINEST)) log.finest(MethodNodeSave+" returned "+o.toString()); + return methodReturnInteger(MethodNodeSave, o); } + + + /** + * In case a NULL value exception was thrown, what caused it? + * @param x + */ + private void checkNullValue(XmlRpcExtensionException x, DrupalNode node) { + // was it a null value? + // stupidity check + for (Map.Entry<String, Object> entry : node.entrySet()) + { + if (entry.getValue() == null) + log.severe("key "+entry.getKey()+" has a <null> value."); + } + } + + + /** + * This method call is provided on the server by + * https://organik.opendfki.de/repos/trunk/drupal/contributions/modules/organik_aperture + * @param node must have URL and datasource set. + * @return the updated nid + */ + public int externaldocumentUpdate(DrupalNode node) throws Exception { + Vector<Object> params = generateDefaultParams(MethodExternalDocumentUpdate); + params.add(node); + Object o; + try { + o = xmlRpcClient.execute(MethodExternalDocumentUpdate, params); + } catch (XmlRpcExtensionException x) { + checkNullValue(x, node); + throw x; + } + if (log.isLoggable(Level.FINEST)) + log.finest(MethodExternalDocumentUpdate+" returned "+o.toString()); + return methodReturnInteger(MethodExternalDocumentUpdate, o); + } + + /** + * convert a return parameter to integer. the method otherwise was successfull, so say this in the exception + * @param methodname the called method name + * @param o the returned object + * @return o as integer + * @throws Exception if the conversion fails + */ + private int methodReturnInteger(String methodname, + Object o) throws Exception { + if (o instanceof Integer) + return (Integer)o; + else if (o instanceof String) + try { + return Integer.parseInt((String)o); + } catch (NumberFormatException x) { + throw new Exception(methodname+" completed successfully, but returned not a number but "+o); + } + else { + if (o == null) + throw new Exception(methodname+" completed successfully, but returned NULL"); + else + throw new Exception(methodname+" completed successfully, but returned not an integer but "+o+" of type "+o.getClass().getName()); + } + } + + + /** + * This method call is provided on the server by + * https://organik.opendfki.de/repos/trunk/drupal/contributions/modules/organik_aperture + * @param uri the uri of the dataobject + * @param datasource the datasource of the dataobject to delete + * @return the deleted nid + */ + public int externaldocumentDelete(String uri, String datasource) throws Exception { + Vector<Object> params = generateDefaultParams(MethodExternalDocumentDelete); + params.add(uri); + params.add(datasource); + Object o; + o = xmlRpcClient.execute(MethodExternalDocumentDelete, params); + if (log.isLoggable(Level.FINEST)) + log.finest(MethodExternalDocumentDelete+" returned "+o.toString()); + return methodReturnInteger(MethodExternalDocumentDelete, o); + } } \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-04-18 17:02:56 UTC (rev 2323) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-04-18 20:35:48 UTC (rev 2324) @@ -1439,7 +1439,7 @@ private void loadCrawlerHandler() { try { apertureServerCrawlerHandler = new DrupalCrawlerHandler(this.apertureRegistries, - configurationModelSet.getModel(APERTURESERVER_CRAWLERHANDLERCONTEXTURI)); + configurationModelSet.getModel(APERTURESERVER_CRAWLERHANDLERCONTEXTURI), this); } catch (Exception x) { log.log(Level.WARNING, "Cannot initialize Crawler Handler: "+x, x); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <leo...@us...> - 2010-06-16 07:23:31
|
Revision: 2355 http://aperture.svn.sourceforge.net/aperture/?rev=2355&view=rev Author: leo_sauermann Date: 2010-06-16 07:23:24 +0000 (Wed, 16 Jun 2010) Log Message: ----------- aperture-webserver: handling of user.uid (does not work 100%, uid is ignored by drupal currently), fixed bug that stopped reloaded crawlerhandler from working (handler was wrongly buffered by the ApertureCrawlerFactory) Modified Paths: -------------- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureCrawlerFactory.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java Added Paths: ----------- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFITERATORDATASOURCECONST.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawler.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerFactory.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerTest.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSource.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSourceFactory.java aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratordatasource.ttl aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratortestdata.ttl aperture-webserver/trunk/test/ aperture-webserver/trunk/test/resources/ aperture-webserver/trunk/test/resources/test.doc Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFITERATORDATASOURCECONST.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFITERATORDATASOURCECONST.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFITERATORDATASOURCECONST.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,23 @@ +/** + * + */ +package org.semanticdesktop.aperture.crawler.rdfiterator; + +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; + +/** + * Static constants for the RDF iterator data source. + * These should be auto-generated, but Leo does not know this offline moment sitting in IC2064 how to do that. + * @author leo.sauermann + * + */ +public class RDFITERATORDATASOURCECONST { + public static final String resourcefile = "rdfiteratordatasource.ttl"; + public static final String NS = "http://aperture.semanticdesktop.org/ontology/2010/04/27/rdfiteratords#"; + public static final URI RDFIteratorDataSource = new URIImpl(NS+"RDFIteratorDataSource"); + public static final URI filename = new URIImpl(NS+"filename"); + public static final URI ignoreClass = new URIImpl(NS+"ignoreClass"); + + +} Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawler.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawler.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,295 @@ +/** + * + */ +package org.semanticdesktop.aperture.crawler.rdfiterator; + +import java.io.File; +import java.io.FileInputStream; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; + +import org.ontoware.aifbcommons.collection.ClosableIterator; +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.QueryResultTable; +import org.ontoware.rdf2go.model.QueryRow; +import org.ontoware.rdf2go.model.Statement; +import org.ontoware.rdf2go.model.Syntax; +import org.ontoware.rdf2go.model.node.Literal; +import org.ontoware.rdf2go.model.node.Node; +import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.Variable; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.ontoware.rdf2go.util.RDFTool; +import org.ontoware.rdf2go.vocabulary.RDF; +import org.ontoware.rdf2go.vocabulary.RDFS; +import org.semanticdesktop.aperture.accessor.AccessData; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.base.DataObjectBase; +import org.semanticdesktop.aperture.crawler.ExitCode; +import org.semanticdesktop.aperture.crawler.base.CrawlerBase; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.vocabulary.DATASOURCE; +import org.semanticdesktop.aperture.vocabulary.FRESNEL; +import org.semanticdesktop.aperture.vocabulary.GEO; +import org.semanticdesktop.aperture.vocabulary.NAO; +import org.semanticdesktop.aperture.vocabulary.NCAL; +import org.semanticdesktop.aperture.vocabulary.NCO; +import org.semanticdesktop.aperture.vocabulary.NEXIF; +import org.semanticdesktop.aperture.vocabulary.NFO; +import org.semanticdesktop.aperture.vocabulary.NID3; +import org.semanticdesktop.aperture.vocabulary.NIE; +import org.semanticdesktop.aperture.vocabulary.NMO; +import org.semanticdesktop.aperture.vocabulary.SOURCEFORMAT; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A crawler which iterates through an RDF file, searching for + * NIE:InformationElements to crawl and reporting those. + * <h3>RDF model partition</h3> + * The input RDF model must be partitioned into individual DataObjects and + * RDFContainers to be passed on. This partitioning is done along property axes, + * whereas S-P-O patterns are followed to O as long as P is not rdf:type or + * rdfs:seeAlso. + * + * @author leo.sauermann + * + */ +public class RDFIteratorCrawler extends CrawlerBase { + + private Logger logger = LoggerFactory.getLogger(getClass()); + + /** + * List of classes that are subclasses of nie:InformationElement. + * This is precomputed before each crawl, and it is based on an arbitrary selection from the core + * ontologies. You may be surprised that this may stop your file from being parsed, + * if you used arbitrary subclasses. + * It can be limited by using the {@link RDFITERATORDATASOURCECONST#ignoreClass} parameter. + */ + Collection<URI> informationElementClasses; + + /** + * + */ + public RDFIteratorCrawler() { + } + + /* + * (non-Javadoc) + * + * @see org.semanticdesktop.aperture.crawler.base.CrawlerBase#crawlObjects() + */ + @Override + protected ExitCode crawlObjects() { + // dummy-check datasource + RDFIteratorDataSource ds; + if (getDataSource() instanceof RDFIteratorDataSource) + ds = ((RDFIteratorDataSource) getDataSource()); + else { + ds = new RDFIteratorDataSource(); + ds.setConfiguration(getDataSource().getConfiguration()); + } + + // open file + File f = new File(ds.getFilename()); + if (!f.exists()) + // this is bad and we can stop here + // TODO: replace with "correct" Exception + throw new RuntimeException("File not found: "+f); + + // this, the fuck, does not work as thought, hence some hacks here. + Syntax syntax = RDFTool.guessSyntax(f.getName()); + if (f.getName().toLowerCase().endsWith(".ttl")) + syntax = Syntax.Turtle; + if (syntax == null) + throw new RuntimeException("Cannot detect RDF syntax of filename "+f); + + Model model = RDF2Go.getModelFactory().createModel(); + model.open(); + try { + FileInputStream in = new FileInputStream(f); + try { + model.readFrom(in, syntax); + } finally { + in.close(); + } + // are we stopped already? + if (stopRequested) + return ExitCode.STOP_REQUESTED; + // sparql through + return iterateThrough(model); + } catch (Exception x) { + // TODO: replace with "correct" Exception + throw new RuntimeException(x); + } finally { + model.close(); + } + } + + /** + * Iterate through all distinct resources in the model which are subclasses of nie:InformationElement + * @param model the model to iterate through + * @return ExitCode for crawling + */ + protected ExitCode iterateThrough(Model model) { + String queryString = null; + for (URI cl : getInformationElementClasses()) { + if (queryString != null) + queryString += " UNION "; + else queryString = ""; + queryString += " {?r rdf:type "+cl.toSPARQL()+"}\n"; + } + queryString = + "PREFIX rdf: <" + RDF.RDF_NS + "> \n" + + "SELECT DISTINCT ?r WHERE {" + queryString+" }"; + QueryResultTable res = model.sparqlSelect(queryString); + for (ClosableIterator<QueryRow> i = res.iterator(); i.hasNext(); ) + { + // are we stopped already? + if (stopRequested) { + i.close(); + return ExitCode.STOP_REQUESTED; + } + Node v = i.next().getValue("r"); + if (!(v instanceof Resource)) { + logger.debug("will not crawl blank node "+v.toSPARQL()); + continue; + } + URI u = v.asURI(); + // this is the URI to crawl now + crawlUri(model, u); + } + return ExitCode.COMPLETED; + } + + /** + * Crawl this uri from the passed model + * @param model the model to read from + * @param u the resource URI to evaluate for crawling + */ + private void crawlUri(Model model, URI u) { + reportAccessingObject(u.toString()); + // TODO: change detection. This is going to be a PITA and involve hashing RDF. I leave it for the interested reader. + // was the URI there before? + String datevalue = accessData.get(u.toString(), AccessData.DATE_KEY); + if (datevalue == null) { + // new + DataObject dataobject = extractDataObject(model, u); + reportNewDataObject(dataobject); + accessData.put(u.toString(), AccessData.DATE_KEY, Long.toString(System.currentTimeMillis())); + } else { + // changed or unchanged, I don't know + DataObject dataobject = extractDataObject(model, u); + reportModifiedDataObject(dataobject); + accessData.put(u.toString(), AccessData.DATE_KEY, Long.toString(System.currentTimeMillis())); + } + } + + /** + * Extract the passed URI as dataobject from the passed model. + * Follow all links to the right (S-P-O) but not type-links. + * Avoid endless loops. + * Basically: get a CBD, including resources, to the right, no loop, no type metadata. + * @param model to crawl + * @param u the uri to extract + * @return an extracted model + */ + private DataObject extractDataObject(Model model, URI u) { + HashSet<URI> stop = new HashSet<URI>(); + RDFContainer metadata = getRDFContainerFactory(u.toString()).getRDFContainer(u); + DataObject result = new DataObjectBase(u, getDataSource(), metadata); + extractDataObjectRec(model, u, metadata, stop); + return result; + } + + /** + * recursively extract all statements from model m into metadata but stop at URIs contained in stop. + * @param model + * @param u + * @param metadata + * @param stop + */ + private void extractDataObjectRec(Model model, URI u, + RDFContainer metadata, HashSet<URI> stop) { + for (ClosableIterator<Statement> i = model.findStatements(u, Variable.ANY, Variable.ANY); i.hasNext();) { + Statement s = i.next(); + metadata.getModel().addStatement(s); + // recurse? see class documentation where this stops + Node o = s.getObject(); + if ((o instanceof URI) && (!RDF.type.equals(s.getPredicate())) && !stop.contains(o)) { + URI ou = o.asURI(); + stop.add(ou); + extractDataObjectRec(model, ou, metadata, stop); + } + } + } + + /** + * Get all pre-known subclasses of InformationElement + * @return + */ + protected Collection<URI> getInformationElementClasses() { + if (informationElementClasses == null) { + informationElementClasses = new HashSet<URI>(); + Model m = RDF2Go.getModelFactory().createModel(); + m.open(); + try { + DATASOURCE.getDATASOURCEOntology(m); + FRESNEL.getFRESNELOntology(m); + GEO.getGEOOntology(m); + NAO.getNAOOntology(m); + NCAL.getNCALOntology(m); + NCO.getNCOOntology(m); + NEXIF.getNEXIFOntology(m); + NFO.getNFOOntology(m); + NID3.getNID3Ontology(m); + NIE.getNIEOntology(m); + NMO.getNMOOntology(m); + SOURCEFORMAT.getSOURCEFORMATOntology(m); + // find all subclasses of nie:InformationElement + HashSet<URI> tocheck = new HashSet<URI>(); + tocheck.add(NIE.InformationElement); + while (!tocheck.isEmpty()) { + // emulating "pop" + Iterator<URI> i = tocheck.iterator(); + URI cur = i.next(); + i.remove(); + + // check + for (ClosableIterator<Statement> subcli = m.findStatements(Variable.ANY, RDFS.subClassOf, cur); subcli.hasNext(); ) { + Statement subcls = subcli.next(); + if (!(subcls.getSubject() instanceof URI)) + continue; + URI subcl = subcls.getSubject().asURI(); + if (!informationElementClasses.contains(subcl)) { + informationElementClasses.add(subcl); + tocheck.add(subcl); + } + } + } + // now remove those which we want to ignore + for (Object o : getDataSource().getConfiguration().getAll(RDFITERATORDATASOURCECONST.ignoreClass)) { + if (o instanceof Literal) { + // well, we are ok with that, the GUI may have fucked this up + try { + URIImpl u = new URIImpl(((Literal)o).getValue()); + informationElementClasses.remove(u); + } catch (Exception x) { + logger.debug("cannot ignore "+o+".", x); + } + } else if (o instanceof URI) { + informationElementClasses.remove((URI)o); + } else + logger.debug("cannot ignore "+o+", it is neither URI nor literal."); + } + } finally { + m.close(); + } + } + return informationElementClasses; + } + +} Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerFactory.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerFactory.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerFactory.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,34 @@ +/** + * + */ +package org.semanticdesktop.aperture.crawler.rdfiterator; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.crawler.Crawler; +import org.semanticdesktop.aperture.crawler.CrawlerFactory; +import org.semanticdesktop.aperture.datasource.DataSource; + +/** + * @author leo.sauermann + * + */ +public class RDFIteratorCrawlerFactory implements CrawlerFactory { + + public Crawler getCrawler(DataSource dataSource) { + RDFIteratorCrawler crawler = new RDFIteratorCrawler(); + crawler.setDataSource(dataSource); + return crawler; + } + + /* (non-Javadoc) + * @see org.semanticdesktop.aperture.crawler.CrawlerFactory#getSupportedTypes() + */ + public Set getSupportedTypes() { + return Collections.singleton(RDFITERATORDATASOURCECONST.RDFIteratorDataSource); + } + +} Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerTest.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerTest.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorCrawlerTest.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,147 @@ +/** + * + */ +package org.semanticdesktop.aperture.crawler.rdfiterator; + +import junit.framework.TestCase; + +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.base.ModelAccessData; +import org.semanticdesktop.aperture.crawler.Crawler; +import org.semanticdesktop.aperture.crawler.base.CrawlerHandlerBase; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.vocabulary.NCO; +import org.semanticdesktop.aperture.vocabulary.NIE; + +/** + * @author leo.sauermann + * + */ +public class RDFIteratorCrawlerTest extends TestCase { + + public static final String TESTDATA_NS = "http://www.exmaple.org/crawl/"; + + public class TestCrawlerHandler extends CrawlerHandlerBase { + + private String getLocalname(DataObject o) { + return o.getID().toString().substring(TESTDATA_NS.length()); + } + + private void checkObject(DataObject object) { + // is this ok? + String ln = getLocalname(object); + if ("d1".equals(ln)) { + assertEquals("First title", object.getMetadata().getString(NIE.title)); + assertEquals("The first file", object.getMetadata().getString(NIE.plainTextContent)); + // iterate further + URI creator = object.getMetadata().getURI(NCO.creator); + assertEquals(TESTDATA_NS+"c1", creator.toString()); + RDFContainerImpl creatorC = new RDFContainerImpl(object.getMetadata().getModel(), creator); + assertEquals("John Doe", creatorC.getString(NCO.fullname)); + + } else + throw new RuntimeException("hey, this dataobject should not be reported: "+object); + } + + @Override + public void objectChanged(Crawler crawler, DataObject object) { + checkObject(object); + super.objectChanged(crawler, object); + } + + @Override + public void objectNew(Crawler crawler, DataObject object) { + checkObject(object); + super.objectNew(crawler, object); + } + + + } + + ModelAccessData accessData; + Model accessDataModel; + RDFIteratorCrawler crawler; + RDFIteratorDataSource datasource; + TestCrawlerHandler handler; + + /** + * + */ + public RDFIteratorCrawlerTest() { + } + + /** + * @param name + */ + public RDFIteratorCrawlerTest(String name) { + super(name); + } + + @Override + protected void setUp() throws Exception { + super.setUp(); + crawler = new RDFIteratorCrawler(); + + // Datasource + datasource = new RDFIteratorDataSource(); + Model cfgmodel = RDF2Go.getModelFactory().createModel(); + cfgmodel.open(); + URIImpl cfgUri = new URIImpl("urn:dscfg"); + RDFContainer datasourceCfg = new RDFContainerImpl(cfgmodel, cfgUri); + datasource.setConfiguration(datasourceCfg); + // ahem, this is dodgy + datasource.setFilename("src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratortestdata.ttl"); + datasourceCfg.add(RDFITERATORDATASOURCECONST.ignoreClass, NCO.Contact); + + // AccessData + accessDataModel = RDF2Go.getModelFactory().createModel(); + accessDataModel.open(); + accessData = new ModelAccessData(accessDataModel); + + // Handler + handler = new TestCrawlerHandler(); + + // bind all to crawler + crawler.setAccessData(accessData); + crawler.setCrawlerHandler(handler); + crawler.setDataSource(datasource); + } + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + accessData = null; + accessDataModel.close(); + accessDataModel = null; + datasource.dispose(); + datasource = null; + crawler = null; + handler = null; + } + + public void testCrawl() { + crawler.crawl(); + assertEquals(0, crawler.getCrawlReport().getChangedCount()); + assertEquals(1, crawler.getCrawlReport().getNewCount()); + assertEquals(0, crawler.getCrawlReport().getRemovedCount()); + assertEquals(0, crawler.getCrawlReport().getUnchangedCount()); + crawler.crawl(); + assertEquals(1, crawler.getCrawlReport().getChangedCount()); + assertEquals(0, crawler.getCrawlReport().getNewCount()); + assertEquals(0, crawler.getCrawlReport().getRemovedCount()); + assertEquals(0, crawler.getCrawlReport().getUnchangedCount()); + crawler.crawl(); + assertEquals(1, crawler.getCrawlReport().getChangedCount()); + assertEquals(0, crawler.getCrawlReport().getNewCount()); + assertEquals(0, crawler.getCrawlReport().getRemovedCount()); + assertEquals(0, crawler.getCrawlReport().getUnchangedCount()); + } + + + +} Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSource.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSource.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSource.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,38 @@ +package org.semanticdesktop.aperture.crawler.rdfiterator; + +import java.util.Collection; + +import org.ontoware.rdf2go.model.node.Node; +import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.datasource.base.DataSourceBase; + +/** + * + * @author leo.sauermann + * + */ +public class RDFIteratorDataSource extends DataSourceBase { + + public URI getType() { + return RDFITERATORDATASOURCECONST.RDFIteratorDataSource; + } + + public String getFilename() { + return getConfiguration().getString(RDFITERATORDATASOURCECONST.filename); + } + + public void setFilename(String value) { + getConfiguration().put(RDFITERATORDATASOURCECONST.filename, value); + } + public Collection getIgnoreClass() { + return getConfiguration().getAll(RDFITERATORDATASOURCECONST.ignoreClass); + } + public void setIgnoreClass(Collection<Node> value) { + getConfiguration().remove(RDFITERATORDATASOURCECONST.ignoreClass); + for (Node n : value) { + getConfiguration().add(RDFITERATORDATASOURCECONST.ignoreClass, n); + } + } + + +} Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSourceFactory.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSourceFactory.java (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/RDFIteratorDataSourceFactory.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,42 @@ +/** + * + */ +package org.semanticdesktop.aperture.crawler.rdfiterator; + +import java.io.InputStream; + +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Syntax; +import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.datasource.DataSource; +import org.semanticdesktop.aperture.datasource.DataSourceFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author leo.sauermann + * + */ +public class RDFIteratorDataSourceFactory implements DataSourceFactory { + + private Logger logger = LoggerFactory.getLogger(getClass()); + + public boolean getDescription(Model model) { + try { + InputStream in = getClass().getResourceAsStream(RDFITERATORDATASOURCECONST.resourcefile); + model.readFrom(in, Syntax.Turtle); + } catch (Exception x) { + logger.warn("cannot load "+RDFITERATORDATASOURCECONST.resourcefile+". This may cause this datasource not to be configurable.", x); + } + return false; + } + + public URI getSupportedType() { + return RDFITERATORDATASOURCECONST.RDFIteratorDataSource; + } + + public DataSource newInstance() { + return new RDFIteratorDataSource(); + } + +} Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratordatasource.ttl =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratordatasource.ttl (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratordatasource.ttl 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,46 @@ +@prefix fresnel: <http://www.w3.org/2004/09/fresnel#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +@prefix sourceformat: <http://aperture.semanticdesktop.org/ontology/sourceformat#> . +@prefix source: <http://aperture.semanticdesktop.org/ontology/2007/08/12/source#> . +@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#> . +@prefix : <http://aperture.semanticdesktop.org/ontology/2010/04/27/rdfiteratords#> . + +:RDFIteratorDataSource a rdfs:Class ; + rdfs:subClassOf nie:DataSource ; + rdfs:label "RDF Data Source" ; + rdfs:comment "Crawl aperture-friendly NIE data in RDF. Uses a file as input." . + +:RDFIteratorDataSourceLens rdf:type fresnel:Lens ; + fresnel:purpose fresnel:defaultLens ; + fresnel:classLensDomain :RDFIteratorDataSource ; + fresnel:showProperties ( :filename + :ignoreClass + ) . + +:filename a rdf:Property ; + rdfs:label "filename" ; + rdfs:comment "The full path of the RDF file to crawl. Note: the file must be encoded in UTF-8 and end with a registered RDF file extension (.rdf, .ttl, .nt, .trix, .trig)." ; + rdfs:domain :RDFIteratorDataSource ; + rdfs:range xsd:string . + +:filenameFormat a fresnel:Format ; + sourceformat:valueWidget [ + a sourceformat:TextFieldWidget + ] ; + fresnel:propertyFormatDomain :filename . + + +:ignoreClass a rdf:Property ; + rdfs:label "ignore class" ; + rdfs:comment "Ignore the listed RDF classes when returning resources" ; + rdfs:domain :RDFIteratorDataSource ; + rdfs:range rdfs:Class . + +:ignoreClassFormat a fresnel:Format ; + sourceformat:valueWidget [ + a sourceformat:MultipleTextFieldWidget + ] ; + fresnel:propertyFormatDomain :ignoreClass . Added: aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratortestdata.ttl =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratortestdata.ttl (rev 0) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/crawler/rdfiterator/rdfiteratortestdata.ttl 2010-06-16 07:23:24 UTC (rev 2355) @@ -0,0 +1,19 @@ +@prefix fresnel: <http://www.w3.org/2004/09/fresnel#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +@prefix sourceformat: <http://aperture.semanticdesktop.org/ontology/sourceformat#> . +@prefix source: <http://aperture.semanticdesktop.org/ontology/2007/08/12/source#> . +@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> . +@prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#> . +@prefix nco: <http://www.semanticdesktop.org/ontologies/2007/03/22/nco#> . + +@prefix : <http://www.exmaple.org/crawl/> . + +:d1 a nfo:Document; + nie:title "First title"; + nco:creator :c1; + nie:plainTextContent "The first file". +:c1 a nco:Contact; + nco:fullname "John Doe". + \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DRUPALCRAWLERHANDLERCONFIG.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -18,6 +18,7 @@ public static final URI configDomain = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configDomain"); public static final URI configApikey = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configApikey"); public static final URI configUsername = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configUsername"); + public static final URI configUserID = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configUserID"); public static final URI configPassword = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configPassword"); public static final URI configXMLRPCServiceUrl = new URIImpl("http://aperture.semanticdesktop.org/ontology/2010/03/23/drupalcrawlerhandler#configXMLRPCServiceUrl"); Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerHandler.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -68,6 +68,12 @@ String username; /** + * userid for uploading content. see {@link DRUPALCRAWLERHANDLERCONFIG#configUserID} + * may be null when not configured properly. Then the uploading user is "anonymous" + */ + Integer userid; + + /** * password for auth */ String password; @@ -161,12 +167,15 @@ it.close(); } - this.domain = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configDomain); - this.apikey = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configApikey); - this.serviceUrl = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configXMLRPCServiceUrl); - this.username = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configUsername); - this.password = RDFTool.getSingleValueString(configModel, configR, DRUPALCRAWLERHANDLERCONFIG.configPassword); + RDFContainerImpl conf = new RDFContainerImpl(configModel, configR.asURI()); + this.domain = conf.getString(DRUPALCRAWLERHANDLERCONFIG.configDomain); + this.apikey = conf.getString(DRUPALCRAWLERHANDLERCONFIG.configApikey); + this.serviceUrl = conf.getString(DRUPALCRAWLERHANDLERCONFIG.configXMLRPCServiceUrl); + this.username = conf.getString(DRUPALCRAWLERHANDLERCONFIG.configUsername); + this.userid = conf.getInteger(DRUPALCRAWLERHANDLERCONFIG.configUserID); + this.password = conf.getString(DRUPALCRAWLERHANDLERCONFIG.configPassword); + // TODO: check config? setExtractingContents(true); @@ -175,12 +184,13 @@ public DrupalCrawlerHandler(String domain, String apikey, - String serviceUrl, String username, String password) { + String serviceUrl, String username, Integer userid, String password) { super(); this.domain = domain; this.apikey = apikey; this.serviceUrl = serviceUrl; this.username = username; + this.userid = userid; this.password = password; setExtractingContents(true); } @@ -250,7 +260,7 @@ super.objectChanged(crawler, object); } - protected static DrupalNode objectToNode(DataObject object) { + protected DrupalNode objectToNode(DataObject object) { RDFContainer data = object.getMetadata(); DrupalNode node = new DrupalNode(); node.setType("externaldocument"); // required by aperture to store URL and datasource @@ -265,6 +275,9 @@ node.setBody(body); node.put("URL", object.getID().toString()); node.put("datasource", object.getDataSource().getID().toString()); + // node.put("owner_name", username); --> this is ignored on the drupal side, therefore we need the userID + if (userid!=null) + node.put("uid", userid.toString()); // rdf-ify it String rdfAsString = RDFTool.modelToString(data.getModel(), Syntax.RdfXml); // stupidity check Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalCrawlerUploadExample.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -29,29 +29,46 @@ * @param args */ public static void main(String[] args) throws Exception { + String domain = "aperture"; + String apikey = "041a913336c34858cf77efa6164fe074"; + String serviceUrl = "http://localhost/organikdrupal/?q=services/xmlrpc"; + String username = "root"; + Integer userid = 3; + String password = "root"; + DataObject o=null; try { // connect to drupal - DrupalXmlRpcService drupal = new DrupalXmlRpcService("aperture", - "97af6a61c5ca8ec6680be4458c7ad97b", - "http://localhost/organikdrupal/?q=services/xmlrpc"); + DrupalXmlRpcService drupal = new DrupalXmlRpcService(domain, + apikey, serviceUrl); drupal.connect(); - drupal.login("root", "root"); + drupal.login(username, password); try { o = extractDataObject(); - DrupalNode node = DrupalCrawlerHandler.objectToNode(o); - drupal.nodeSave(node); + DrupalCrawlerHandler handler = new DrupalCrawlerHandler(domain, apikey, serviceUrl, username, userid, password ); + DrupalNode node = handler.objectToNode(o); + int saved = drupal.nodeSave(node); + System.out.println("saved node as "+saved); + // read + DrupalNode returned = drupal.nodeGet(saved); + System.out.println("returned node: "+returned.getNid()+": "+returned.getTitle()+ + " uid:"+returned.get("uid")); + // now update this node o.getMetadata().put(NIE.title, "changed !"); - node = DrupalCrawlerHandler.objectToNode(o); + node = handler.objectToNode(o); int nidupdate = drupal.externaldocumentUpdate(node); System.out.println("updated node with ID "+nidupdate); + // read + returned = drupal.nodeGet(nidupdate); + System.out.println("returned node: "+returned.getNid()+": "+returned.getTitle()+ + " uid:"+returned.get("uid")); + // now delete the node - int niddelete = drupal.externaldocumentDelete(o.getID().toString(), o.getDataSource().getID().toString()); - + int niddelete = drupal.externaldocumentDelete(o.getID().toString(), o.getDataSource().getID().toString()); System.out.println("deleted node with ID "+niddelete); } finally { drupal.logout(); @@ -65,7 +82,7 @@ public static DataObject extractDataObject() throws Exception { System.out.print("Extracting Data Object ... "); - File f = new File("C:/1/test.doc"); + File f = new File("test/resources/test.doc"); URI uri = new URIImpl( f.toURI().toASCIIString()); WordExtractor extractor = new WordExtractor(); Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/DrupalXmlRpcService.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -50,6 +50,7 @@ * @author sauermann */ public static final String MethodNodeSave = "node.save"; + public static final String MethodNodeGet = "node.get"; public static final String MethodSystemConnect = "system.connect"; public static final String MethodUserLogout = "user.logout"; public static final String MethodUserLogin = "user.login"; @@ -354,4 +355,23 @@ log.finest(MethodExternalDocumentDelete+" returned "+o.toString()); return methodReturnInteger(MethodExternalDocumentDelete, o); } + + + /** + * Get the node with the passed nid + * @param nid + */ + public DrupalNode nodeGet(int nid) throws Exception { + Vector<Object> params = generateDefaultParams(MethodNodeGet); + params.add(nid); + Object o; + o = xmlRpcClient.execute(MethodNodeGet, params); + if (log.isLoggable(Level.FINEST)) + log.finest(MethodNodeGet+" returned "+o.toString()); + // result should be a struct + if (!(o instanceof Map)) + throw new ClassCastException("Cannot convert returned node to a Map: "+o); + return new DrupalNode((Map)o); + + } } \ No newline at end of file Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/drupalhandler/drupalcrawlerhandler.ttl 2010-06-16 07:23:24 UTC (rev 2355) @@ -35,6 +35,12 @@ rdfs:comment "Username used to log into the Drupal Server and add content." ; rdfs:domain :DrupalCrawlerHandler ; rdfs:range xsd:string . + +:configUserID a rdf:Property ; + rdfs:label "userid" ; + rdfs:comment "Drupal User ID used to mark uploaded content added to the Drupal Server." ; + rdfs:domain :DrupalCrawlerHandler ; + rdfs:range xsd:integer . :configPassword a rdf:Property ; rdfs:label "password" ; Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureCrawlerFactory.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureCrawlerFactory.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ApertureCrawlerFactory.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -62,9 +62,6 @@ /** The data source pool */ private DataSourcePool dataSources; - /** The crawler handler */ - private CrawlerHandler handler; - /** * The main constructor. * @param server The parent Aperture Server instance. @@ -73,7 +70,6 @@ this.server = server; this.apertureRegistries = server.getApertureRegistries(); this.dataSources = server.getDataSourcePool(); - this.handler = server.getHandler(); } /** @@ -94,7 +90,7 @@ crawler.setAccessData(accessData); crawler.setDataAccessorRegistry(apertureRegistries .getDataAccessorRegistry()); - crawler.setCrawlerHandler(handler); + crawler.setCrawlerHandler(server.getHandler()); // ugly hack, the WebCrawler requires a MimeTypeIdentifier and a // LinkExtractor registry, these methods are available only in Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/server/impl/ServerImpl.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -1440,6 +1440,7 @@ try { apertureServerCrawlerHandler = new DrupalCrawlerHandler(this.apertureRegistries, configurationModelSet.getModel(APERTURESERVER_CRAWLERHANDLERCONTEXTURI), this); + log.log(Level.CONFIG, "Loaded crawler handler configuration."); } catch (Exception x) { log.log(Level.WARNING, "Cannot initialize Crawler Handler: "+x, x); } Modified: aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java =================================================================== --- aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java 2010-06-11 10:00:20 UTC (rev 2354) +++ aperture-webserver/trunk/src/org/semanticdesktop/aperture/servlet/ApertureServerServlet.java 2010-06-16 07:23:24 UTC (rev 2355) @@ -19,6 +19,8 @@ import org.ontoware.rdf2go.RDF2Go; import org.semanticdesktop.aperture.accessor.impl.DefaultDataAccessorRegistry; import org.semanticdesktop.aperture.crawler.impl.DefaultCrawlerRegistry; +import org.semanticdesktop.aperture.crawler.rdfiterator.RDFIteratorCrawlerFactory; +import org.semanticdesktop.aperture.crawler.rdfiterator.RDFIteratorDataSourceFactory; import org.semanticdesktop.aperture.datasource.impl.DefaultDataSourceRegistry; import org.semanticdesktop.aperture.detector.impl.DefaultDataSourceDetectorRegistry; import org.semanticdesktop.aperture.hypertext.linkextractor.impl.DefaultLinkExtractorRegistry; @@ -156,6 +158,10 @@ //registries.setTrustDeciderRegistry(??) //registries.setTrustManagerRegistry(??) + // hack + registries.getCrawlerRegistry().add(new RDFIteratorCrawlerFactory()); + registries.getDataSourceRegistry().add(new RDFIteratorDataSourceFactory()); + server = new ServerImpl(parameters, registries, RDF2Go .getModelFactory()); Added: aperture-webserver/trunk/test/resources/test.doc =================================================================== (Binary files differ) Property changes on: aperture-webserver/trunk/test/resources/test.doc ___________________________________________________________________ Added: svn:mime-type + application/octet-stream This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |