From: <jen...@us...> - 2010-07-21 17:55:31
|
Revision: 2214 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2214&view=rev Author: jenslehmann Date: 2010-07-21 17:55:24 +0000 (Wed, 21 Jul 2010) Log Message: ----------- wrote small script to find SPARQL endpoints on the web Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/utilities/Files.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/SparqlEndpointFinder.java Added: trunk/src/dl-learner/org/dllearner/scripts/SparqlEndpointFinder.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SparqlEndpointFinder.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/SparqlEndpointFinder.java 2010-07-21 17:55:24 UTC (rev 2214) @@ -0,0 +1,83 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.dllearner.utilities.Files; + +/** + * Tries to find some public SPARQL endpoints on the web. + * + * @author Jens Lehmann + * + */ +public class SparqlEndpointFinder { + + public static void main(String[] args) throws MalformedURLException, IOException { + SparqlEndpointFinder finder = new SparqlEndpointFinder(); + System.out.println(finder.find()); + } + + public List<String> find() throws MalformedURLException, IOException { + // the W3 wiki SPARQL endpoints page is often updated + String str = Files.readFile(new URL("http://esw.w3.org/index.php?title=SparqlEndpoints&action=edit")); + // typical wiki syntax: [http://dbtune.org/bbc/peel/sparql endpoint] + Pattern p = Pattern.compile("\\[(.+?) endpoint\\]"); + Matcher m = p.matcher(str); + + List<String> endpoints = new LinkedList<String>(); + while (m.find()) { + String endpoint = m.group(1); + if(SparqlEndpointFinder.validateEndpoint(endpoint)) { + endpoints.add(endpoint); + } + } + + // TODO: finde more endpoints e.g. at http://www.freebase.com/view/user/bio2rdf/public/sparql + // or using voiD files + + // TODO: find graphs + + return endpoints; + } + + public static boolean validateEndpoint(String str) { +// URL url = null; + try { +// url = new URL(str); + new URL(str); + } catch (MalformedURLException e) { + return false; + } + // TODO: send example query to check whether endpoint is alive + return true; + } + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/Files.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/Files.java 2010-07-21 17:27:28 UTC (rev 2213) +++ trunk/src/dl-learner/org/dllearner/utilities/Files.java 2010-07-21 17:55:24 UTC (rev 2214) @@ -28,8 +28,10 @@ import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.net.URL; import java.util.ArrayList; import java.util.List; @@ -43,6 +45,25 @@ public static boolean debug = false; /** + * Reads input from a URL and stores it in a string (only recommend for small files). + * @param file URL of a file. + * @return Contents of the file. + * @throws IOException URL not accessible or content cannot be read for some reason. + */ + public static String readFile(URL file) throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(file.openStream())); + + StringBuffer input = new StringBuffer(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + input.append(inputLine + "\n"); + } + in.close(); + + return input.toString(); + } + + /** * Reads in a file. * * @param file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |