From: <sk...@us...> - 2008-08-15 12:03:46
|
Revision: 1083 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1083&view=rev Author: sknappe Date: 2008-08-15 12:03:42 +0000 (Fri, 15 Aug 2008) Log Message: ----------- Added CalculatePageRank to navigator now also categories are put into the database search is now working with tagcloud (when choosing tags you see all articles that are in that category), you can choose how many searchresults you want to have (at the moment 10 or 25 or 50 or 75 or 100) and search results are shown on sites with max 25 searchresults each Modified Paths: -------------- trunk/src/dbpedia-navigator/ajax_search.php trunk/src/dbpedia-navigator/helper_functions.php trunk/src/dbpedia-navigator/index-new.php Added Paths: ----------- trunk/src/dbpedia-navigator/CalculatePageRank.java Copied: trunk/src/dbpedia-navigator/CalculatePageRank.java (from rev 1063, trunk/src/dl-learner/org/dllearner/test/CalculatePageRank.java) =================================================================== --- trunk/src/dbpedia-navigator/CalculatePageRank.java (rev 0) +++ trunk/src/dbpedia-navigator/CalculatePageRank.java 2008-08-15 12:03:42 UTC (rev 1083) @@ -0,0 +1,185 @@ +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.Statement; + +public class CalculatePageRank { + + private final String wikilinks="../pagelinks_en.nt"; + private final String labels="../articles_label_en.nt"; + private final String categories="../yago_en.nt"; + + private void calculateLinks() + { + try{ + Statement stmt; + ResultSet rs; + int number; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(wikilinks)); + + String line; + String[] split; + String name; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(" "); + name=split[2].substring(1, split[2].length()-1); + rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); + if (rs.next()){ + number=rs.getInt(1); + number++; + stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'"); + } + else{ + stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)"); + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + private void addLabels() + { + try{ + Statement stmt; + ResultSet rs; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(labels)); + + String line; + String[] split; + String name; + String label; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(">"); + name=split[0].substring(1); + label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); + rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'"); + if (rs.next()){ + stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'"); + } + else{ + stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")"); + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + private void calculateCategories() + { + try{ + Statement stmt; + + Class.forName("com.mysql.jdbc.Driver"); + + String url = + "jdbc:mysql://localhost:3306/navigator_db"; + + Connection con = DriverManager.getConnection( + url,"navigator", "dbpedia"); + + stmt = con.createStatement(); + BufferedReader in = new BufferedReader(new FileReader(categories)); + + String line; + String[] split; + String name; + String label; + String pred; + int i=0; + while ((line=in.readLine())!=null) + { + split=line.split(">"); + name=split[0].substring(1); + pred=split[1].substring(2); + if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")) + label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\"")); + else + label=split[2].substring(2); + if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){ + try{ + stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")"); + }catch(Exception e) + {} + } + else{ + stmt.executeUpdate("UPDATE rank SET category=\""+label+"\" WHERE name=\""+name+"\""); + } + if (i%100000==0) System.out.println(i); + i++; + } + + in.close(); + con.close(); + } catch (FileNotFoundException e) + { + System.out.println("File not found"); + } catch (IOException e) + { + System.out.println("IOException"); + } catch (Exception e) + { + e.printStackTrace(); + } + } + + public static void main(String[] args){ + CalculatePageRank cal=new CalculatePageRank(); + //cal.calculateLinks(); + //cal.addLabels(); + cal.calculateCategories(); + } +} \ No newline at end of file Modified: trunk/src/dbpedia-navigator/ajax_search.php =================================================================== --- trunk/src/dbpedia-navigator/ajax_search.php 2008-08-15 11:43:53 UTC (rev 1082) +++ trunk/src/dbpedia-navigator/ajax_search.php 2008-08-15 12:03:42 UTC (rev 1083) @@ -3,6 +3,7 @@ $label=$_POST['label']; $list=$_POST['list']; + $number=$_POST['number']; session_start(); $id=$_SESSION['id']; $ksID=$_SESSION['ksID']; @@ -15,30 +16,38 @@ //initialise content $content=""; - /*try{ - require_once("DLLearnerConnection.php"); - $sc=new DLLearnerConnection($id,$ksID); - - $subjects=$sc->getSubjects($label,$checkedInstances); - - $content.=getTagCloud($subjects['tagcloud'],$subjects['tagcloudlabel']); - $content.=getResultsTable($subjects['subjects']); - } catch (Exception $e){ - $content=$e->getMessage(); - }*/ + mysql_connect('localhost','navigator','dbpedia'); mysql_select_db("navigator_db"); - $query="SELECT name, label FROM rank WHERE MATCH (label) AGAINST ('$label') ORDER BY number LIMIT 3"; + $query="SELECT name, label, category FROM rank WHERE MATCH (label) AGAINST ('$label') ORDER BY number DESC LIMIT ".$number; $res=mysql_query($query); $names=array(); $labels=array(); + $classes=array(); + $tags=array(); + $catlabels=array(); while ($result=mysql_fetch_array($res)){ $labels[]=$result['label']; $names[]=$result['name']; + if (!isset($result['category'])){ + $result['category']="NoCategory"; + $result2['label']="No Category"; + } + else + { + $query="SELECT label FROM categories WHERE category='".$result['category']."' LIMIT 1"; + $res2=mysql_query($query); + $result2=mysql_fetch_array($res2); + } + $classes[]=$result['category']; + if (!isset($tags[$result['category']])) $tags[$result['category']]=1; + else $tags[$result['category']]++; + if (!isset($catlabels[$result['category']])) $catlabels[$result['category']]=$result2['label']; } - $content.=getResultsTable($names,$labels); + $content.=getTagCloud($tags,$catlabels); + $content.=getResultsTable($names,$labels,$classes,$number); print $content; print '$$'; - print "Searchresult for ".$label; + print "Searchresult for \"".$label."\""; ?> \ No newline at end of file Modified: trunk/src/dbpedia-navigator/helper_functions.php =================================================================== --- trunk/src/dbpedia-navigator/helper_functions.php 2008-08-15 11:43:53 UTC (rev 1082) +++ trunk/src/dbpedia-navigator/helper_functions.php 2008-08-15 12:03:42 UTC (rev 1083) @@ -18,12 +18,20 @@ function getTagCloud($tags,$label) { + if (isset($tags['NoCategory'])){ + $nc=true; + unset($tags['NoCategory']); + } + else $nc=false; + $max=max($tags); $min=min($tags); $diff=$max-$min; $distribution=$diff/3; $ret="<p>"; + $ret.='<a style="font-size:xx-large;" href="#" onclick="document.getElementById(\'hidden_class\').value=\'all\';show_results(\'all\',document.getElementById(\'hidden_number\').value);">All</a> '; + if ($nc) $ret.='<a style="font-size:xx-small;" href="#" onclick="document.getElementById(\'hidden_class\').value=\'NoCategory\';show_results(\'NoCategory\',document.getElementById(\'hidden_number\').value);">No Category</a> '; foreach ($tags as $tag=>$count){ if ($count==$min) $style="font-size:xx-small;"; else if ($count==$max) $style="font-size:xx-large;"; @@ -31,40 +39,50 @@ else if ($count>($min+$distribution)) $style="font-size:medium;"; else $style="font-size:small;"; - $tag_with_entities=htmlentities("\"".$tag."\""); - $ret.='<a style="'.$style.'" href="#" onclick="xajax_getSubjectsFromConcept(\''.$tag_with_entities.'\');">'.$label[$tag].'</a>'; + //$tag_with_entities=htmlentities("\"".$tag."\""); + $ret.='<a style="'.$style.'" href="#" onclick="document.getElementById(\'hidden_class\').value=\''.$tag.'\';show_results(\''.$tag.'\',document.getElementById(\'hidden_number\').value);">'.$label[$tag].'</a> '; } - $ret.="</p>"; + $ret.="</p><br/>"; return $ret; } -function getResultsTable($names,$labels) +function getResultsTable($names,$labels,$classes,$number) { - $ret="<p>Your search brought ".count($names)." results.</p><br/>"; + $ret="<p>These are your Searchresults. Show best "; + for ($k=10;$k<125;){ + $ret.="<a href=\"#\" onclick=\"var list=tree.getAllChecked();search_it('label='+document.getElementById('label').value+'&list='+list+'&number=".$k."');return false;\""; + if ($k==$number) $ret.=" style=\"text-decoration:none;\""; + else $ret.=" style=\"text-decoration:underline;\""; + $ret.=">".($k)."</a>"; + if ($k!=100) $ret.=" | "; + if($k==10) $k=25; + else $k=$k+25; + } + $ret.="</p><br/>"; $i=0; $display="block"; - while($i*30<count($names)) + $ret.="<div id=\"results\">"; + while($i*25<count($names)) { - $ret.="<div id='results".$i."' style='display:".$display."'>Seite ".($i+1)."<br/><br/>"; - for ($j=0;($j<30)&&(($i*30+$j)<count($names));$j++) + for ($j=0;($j<25)&&(($i*25+$j)<count($names));$j++) { - $name=$names[$i*30+$j]; - $label=$labels[$i*30+$j]; - $ret.=' <a href="" onclick="get_article(\'label='.$name.'&cache=-1\');return false;">'.$label.'</a><br/>'; + $name=$names[$i*25+$j]; + $label=$labels[$i*25+$j]; + $class=$classes[$i*25+$j]; + $ret.='<p style="display:'.$display.'"> '.($i*25+$j+1).'. <a href="" class="'.$class.'" onclick="get_article(\'label='.$name.'&cache=-1\');return false;">'.$label.'</a></p>'; } - $ret.="</div>"; $i++; $display="none"; } - $ret.="<br/><p style='width:100%;text-align:center;'>"; + $ret.='<input type="hidden" id="hidden_class" value="all"/><input type="hidden" id="hidden_number" value="0"/></div><br/><p style="width:100%;text-align:center;" id="sitenumbers">'; for ($k=0;$k<$i;$k++){ - $ret.="<a href=\"\" onClick=\"showdiv('results".($k)."');"; - for ($l=0;$l<$i;$l++) - { - if ($l!=$k) $ret.="hidediv('results".$l."');"; - } - $ret.="return false;\">".($k+1)."</a>"; - if ($k!=($i-1)) $ret.=" | "; + $ret.="<span>"; + if ($k!=0) $ret.=" | "; + $ret.="<a href=\"#\" onclick=\"document.getElementById('hidden_number').value='".(25*$k)."';show_results(document.getElementById('hidden_class').value,".(25*$k).");\""; + if ($k==0) $ret.=" style=\"text-decoration:none;\""; + else $ret.=" style=\"text-decoration:underline;\""; + $ret.=">".($k+1)."</a>"; + $ret.="</span>"; } $ret.="</p>"; return $ret; Modified: trunk/src/dbpedia-navigator/index-new.php =================================================================== --- trunk/src/dbpedia-navigator/index-new.php 2008-08-15 11:43:53 UTC (rev 1082) +++ trunk/src/dbpedia-navigator/index-new.php 2008-08-15 12:03:42 UTC (rev 1083) @@ -75,7 +75,7 @@ } } - function get_article(param) + function search_it(param) { if (document.all){ //IE @@ -86,7 +86,7 @@ var XhrObj = new XMLHttpRequest(); } - XhrObj.open("POST",'ajax_get_article.php'); + XhrObj.open("POST",'ajax_search.php'); XhrObj.onreadystatechange = function() { @@ -94,9 +94,6 @@ var response = XhrObj.responseText.split('$$'); document.getElementById('articlecontent').innerHTML=response[0]; document.getElementById('ArticleTitle').innerHTML=response[1]; - document.getElementById('lastarticles').innerHTML=response[2]; - document.getElementById('Positives').innerHTML=response[3]; - document.getElementById('Negatives').innerHTML=response[4]; } } @@ -104,7 +101,7 @@ XhrObj.send(param); } - function search(param) + function get_article(param) { if (document.all){ //IE @@ -115,7 +112,7 @@ var XhrObj = new XMLHttpRequest(); } - XhrObj.open("POST",'ajax_search.php'); + XhrObj.open("POST",'ajax_get_article.php'); XhrObj.onreadystatechange = function() { @@ -123,12 +120,43 @@ var response = XhrObj.responseText.split('$$'); document.getElementById('articlecontent').innerHTML=response[0]; document.getElementById('ArticleTitle').innerHTML=response[1]; + document.getElementById('lastarticles').innerHTML=response[2]; + document.getElementById('Positives').innerHTML=response[3]; + document.getElementById('Negatives').innerHTML=response[4]; } } XhrObj.setRequestHeader('Content-Type','application/x-www-form-urlencoded'); XhrObj.send(param); } + + function show_results(class, number) + { + var links=document.getElementById('results').getElementsByTagName('p'); + var j=0; + for (var i=0;i<links.length;i++){ + if (links[i].getElementsByTagName('a')[0].className==class||class=='all'){ + if ((j+1)>number&&j<(number+25)) links[i].style.display='block'; + else links[i].style.display='none'; + j++; + } + else links[i].style.display='none'; + } + if (j<number){ + show_results(class,0); + return; + } + + var sitenumbers=document.getElementById('sitenumbers').getElementsByTagName('span'); + for (var i=0;i<sitenumbers.length;i++){ + if ((parseInt(sitenumbers[i].getElementsByTagName('a')[0].innerHTML)-1)*25==number) sitenumbers[i].getElementsByTagName('a')[0].style.textDecoration='none'; + else sitenumbers[i].getElementsByTagName('a')[0].style.textDecoration='underline'; + if ((parseInt(sitenumbers[i].getElementsByTagName('a')[0].innerHTML)-1)*25>=j) + sitenumbers[i].style.display='none'; + else + sitenumbers[i].style.display='inline'; + } + } </script> </head> <body> @@ -148,7 +176,7 @@ <!-- Search:<br/> --> <form onSubmit="get_article('label='+document.getElementById('label').value+'&cache=-1');return false;"> <input type="text" name="label" id="label" /><br/> - <input type="button" value="Article" class="button" onclick="get_article('label='+document.getElementById('label').value+'&cache=-1');return false;" /> <input type="button" value="Search" class="button" onclick="var list=tree.getAllChecked();search('label='+document.getElementById('label').value+'&list='+list);return false;" /> + <input type="button" value="Article" class="button" onclick="get_article('label='+document.getElementById('label').value+'&cache=-1');return false;" /> <input type="button" value="Search" class="button" onclick="var list=tree.getAllChecked();search_it('label='+document.getElementById('label').value+'&list='+list+'&number=10');return false;" /> <!-- <input type="button" value="Fulltext" class="button" onclick=""/> --> </form> </div> <!-- boxcontent --> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |