[DL-Learner SVN] SF.net SVN: dl-learner:[3499] trunk/components-core/src/main/java/org/ dllearner/u

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3499
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3499&view=rev
Author:   lorenz_b
Date:     2011-12-12 14:37:51 +0000 (Mon, 12 Dec 2011)
Log Message:
-----------
Removed 2 unused classes.

Removed Paths:
-------------
    trunk/components-core/src/main/java/org/dllearner/utilities/ICFinder.java
    trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java

Deleted: trunk/components-core/src/main/java/org/dllearner/utilities/ICFinder.java
===================================================================

--- trunk/components-core/src/main/java/org/dllearner/utilities/ICFinder.java	2011-12-12 14:36:59 UTC (rev 3498)
+++ trunk/components-core/src/main/java/org/dllearner/utilities/ICFinder.java	2011-12-12 14:37:51 UTC (rev 3499)
@@ -1,189 +0,0 @@
-package org.dllearner.utilities;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.Vector;
-
-/**
- * 
- * @author Uthaya
- *
- */
-// n + v only
-//David Hope, 2008, University Of Sussex
-
-public class ICFinder
-{
-	private String[]					editor		=	null;
-	private String					icfilename	=	"";
-	private BufferedReader		in				=	null;
-	private String					line			=	"";
-// look up
-	private Hashtable<String, Double>	lookup	=	null; // quick look up for synset counts (we require Double as Resnik counts are doubles)
-// counts for nouns and verbs
-	private	double					nouns_sum								=	0.0;
-	private	double					verbs_sum								=	0.0;
-	private	double					nounsandverbs_sum					=	0.0; // ** the ??? normaliser ??? ** for the 'getProbability' method
-// <ROOTS> for nouns and verbs
-	private	double					nounroot_sum	=	0.0;
-	private	double					verbroot_sum	=	0.0;
-	private	ArrayList<String>	nounroots			=	null;
-	private	ArrayList<String>	verbroots			=	null;
-
-	public ICFinder(String icfilename)
-	{
-		System.out.println("... calculating IC <roots> ...");
-		System.out.println("... ICFinder");
-
-// your IC file
-		this.icfilename = icfilename;
-// quick look up table
-		lookup	=	new Hashtable<String, Double>();
-// get some useful 'constants'
-		nounroots = new ArrayList<String>();
-		verbroots = new ArrayList<String>();
-		Vector<Double> constants = setup();
-		nouns_sum				=	constants.get(0);
-		verbs_sum				=	constants.get(1);
-		nounsandverbs_sum	=	( nouns_sum + verbs_sum );
-		nounroot_sum			=	constants.get(2);
-		verbroot_sum			=	constants.get(3);
-	}
-
-	public double getRootSum(String pos)
-	{
-		if(pos.equalsIgnoreCase("v"))
-			return (verbroot_sum);
-		return (nounroot_sum);
-	}
-
-
-// 'getFrequency': get the count for the {synset} from the IC file
-	private double getFrequency(String synset, String pos)
-	{
-		if(lookup.containsKey(synset + pos))
-			return ( lookup.get(synset + pos) );
-    	return ( 0.0 );
-	}
-
-// 'getProbability': get the probability of the {synset}
-	private double getProbability(String synset, String pos)
-	{
-		double 	freq			=	getFrequency(synset, pos);
-		if(freq == 0.0)
-			return ( 0.0 );
-
-		double	probability	=	0.0;
-
-		if(pos.equalsIgnoreCase("n"))
-				probability = ( freq /  nounroot_sum );	// Ted Pedersen et al. use the sum of the noun<root> counts *not* the sum of the noun counts
-
-		if(pos.equalsIgnoreCase("v"))
-				probability = ( freq / verbroot_sum );		// Ted Pedersen et al. use the sum of the verb<root> counts *not* the sum of the verb counts
-
-		return ( probability );
-	}
-
-
-// does all / any type of synset i.e. standard synset | <lcs> synset
-// !!! we are using the notion of a 'fake'<root> as per the Perl implementation !!!
-// !!! there is no option to turn the 'fake'<root> off in this implementation - it all gets a bit silly (hard to justify) if we do this !!!
-	public double getIC(String synset, String pos)
-	{
-		double ic = 0.0;
-// Case 1. There is *no* <lcs> ...............................................................................................................................................
-//  If the 'synset' is empty (null Object or an empty String), - this implies that no <lcs>|synset was found for a (pair of synsets) and thus,
-//  they must join at an 'imaginary' <root> point in the WordNet space (tree). We call this the'fake'<root>.
-//  Further, *if* we are assuming a 'fake' root' (which we do; we default to it as per the Perl implementation), - this implies
-//  that it subsumes all other <roots>. This being the case, the 'fake'<root> must then have an Information Content(ic) value of 0
-//  as it provides us with zero information
-		if(synset == null || synset.length() == 0)
-		{
-			return ( ic );
-		}
-// .......................................................................................................................................................................................
-// Case 2. There is an <lcs> but it has a frequency of zero and thus it has a probability of zero and thus is just not valid as input
-// to the Information Content equation ( we will get 'Infinity') - so, we simply return 0
-		double p			 	=	getProbability(synset, pos);
-		if(p == 0.0)
-		{
-			return ( ic );
-		}
-		else
-		{
-			ic = -Math.log(p);
-		}
-// .......................................................................................................................................................................................
-// Case 3. There is an <lcs>, -- it may be a <root> or it may be a boring old synset but - it does have a frequency, thus it does have
-//  a probability and thus we may calculate the Information Content for this synset. If the synset is a <root> and there is only 1 such
-// <root> for the POS, then, effectively the Information Contente will be zero, otherwise we should get a value that is greater than zero
-    	return ( ic );
-	}
-
-// utility: get counts for {synsets} | just nouns | just verbs | noun'fake'<root> | verb'fake'<root>
-// these are used to calculate probabilities of {synsets} and to 'back-off' to a <root> value if no LCS exists for 2 words
-	private Vector<Double> setup()
-	{
-		String	unit	=	"";
-		double	uc		=	0.0;
-		double	nc		=	0.0;
-		double	vc		=	0.0;
-		double	nrc	=	0.0;
-		double	vrc	=	0.0;
-		Vector<Double>	counts = new Vector<Double>();
-    	try
-    	{
-        	in = new BufferedReader(new FileReader(icfilename));
-        	while ((line = in.readLine()) != null)
-        	{
-				editor = line.split("\\s"); // IC files are space delimited
-				for(int i = 0; i < editor.length; i++)
-				{
-					unit	=	editor[i];
-// nouns
-					if(unit.endsWith("n"))
-					{
-						lookup.put(editor[0], Double.parseDouble(editor[1]));
-						uc =		Double.parseDouble(editor[1]); // get the value: the 'count' for the {synset}
-						nc	+=	uc;// add to noun total
-						if(editor.length == 3) // if ROOT
-						{
-							nrc += uc;// add to noun<root> total
-							// store noun <root>
-							nounroots.add(editor[0].substring(0,editor[0].length()-1));
-						}
-					}else if(unit.endsWith("v")) // verbs
-					{
-						lookup.put(editor[0], Double.parseDouble(editor[1]));
-						uc =		Double.parseDouble(editor[1]); // get the value: the 'count' for the {synset}
-						vc	+=	uc; // add to verb total
-						if(editor.length == 3) // if ROOT
-						{
-							vrc += uc; // add to verb<root> total
-							// store verb<root>
-							verbroots.add(editor[0].substring(0,editor[0].length()-1));
-						}
-					}/*else{
-						System.err.println("Adj? "+ unit);
-					}*/
-				}
-        	}
-        in.close();
-    	}
-    	catch (IOException e){e.printStackTrace();}
-    	counts.add(nc); counts.add(vc); counts.add(nrc);	counts.add(vrc);
-    	return ( counts );
-	}
-
-	public ArrayList<String>	getNounRoots()
-	{
-		return ( nounroots );
-	}
-	public ArrayList<String>	getVerbRoots()
-	{
-		return ( verbroots );
-	}
-}

Deleted: trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java	2011-12-12 14:36:59 UTC (rev 3498)
+++ trunk/components-core/src/main/java/org/dllearner/utilities/WordnetSimilarity.java	2011-12-12 14:37:51 UTC (rev 3499)
@@ -1,287 +0,0 @@
-package org.dllearner.utilities;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import net.didion.jwnl.JWNL;
-import net.didion.jwnl.JWNLException;
-import net.didion.jwnl.data.IndexWord;
-import net.didion.jwnl.data.POS;
-import net.didion.jwnl.data.PointerTarget;
-import net.didion.jwnl.data.PointerType;
-import net.didion.jwnl.data.Synset;
-import net.didion.jwnl.data.Word;
-import net.didion.jwnl.dictionary.Dictionary;
-
-public class WordnetSimilarity {
-	
-	public Dictionary dict;	
-	
-	public WordnetSimilarity(){
-		try {
-			JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml"));
-			dict = Dictionary.getInstance();
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-	}
-	
-	public double computeSimilarity(String s1, String s2, POS pos){
-		List<String> synonyms = new ArrayList<String>();
-		
-		try {
-			IndexWord iw1 = dict.getIndexWord(pos, s1);
-			IndexWord iw2 = dict.getIndexWord(pos, s2);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s)
-//			IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s);
-			getUpwardHierachy(s1, pos);
-			getUpwardHierachy(s2, pos);
-			
-			ICFinder icFinder = new ICFinder("src/main/resources/ic-semcor.dat");
-			Synset synset1 = iw1.getSenses()[0];
-			Synset synset2 = iw2.getSenses()[0];
-			Synset lcs = getLCS(synset1, synset2, "NN", icFinder);
-			System.out.println(lcs);
-			
-			for(Synset synset : iw1.getSenses()){
-				for(List<PointerTarget> tree : getHypernymTrees(synset, new HashSet<PointerTarget>())){
-					for(PointerTarget t : tree){
-						System.out.print(((Synset)t).getWords()[0].getLemma() + "-->");
-					}
-					System.out.println();
-				}
-			}
-			
-			
-			
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-		
-		
-		return -1;
-	}
-	
-	private List<PointerTarget> getUpwardHierachy(PointerTarget target){
-		List<PointerTarget> hierarchy = new ArrayList<PointerTarget>();
-		try {
-			PointerTarget[] targets = target.getTargets(PointerType.HYPERNYM);
-			for (PointerTarget t : targets) {
-				hierarchy.add(t);
-				hierarchy.addAll(getUpwardHierachy(t));
-			}
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-		return hierarchy;
-		
-	}
-	
-//	private List<List<PointerTarget>> getUpwardHierachies(List<List<PointerTarget>> targets){
-//		List<List<PointerTarget>> hierarchies = new ArrayList<List<PointerTarget>>();
-//		try {
-//			PointerTarget[] targets = target.getTargets(PointerType.HYPERNYM);
-//			for (PointerTarget t : targets) {
-//				hierarchy.add(t);
-//				hierarchy.addAll(getUpwardHierachy(t));
-//			}
-//		} catch (JWNLException e) {
-//			e.printStackTrace();
-//		}
-//		return hierarchy;
-//		
-//	}
-	
-	private void getUpwardHierachy(String word, POS pos){
-		try {
-			IndexWord iw = dict.getIndexWord(pos, word);
-			for(Synset synset : iw.getSenses()){
-				for(PointerTarget t : getUpwardHierachy(synset)){
-					System.out.print(((Synset)t).getWord(0).getLemma() + "-->");
-				}
-				System.out.println();
-			}
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-		
-	}
-	
-	private void getHypernyms(IndexWord iw){
-		try {
-			if(iw != null){
-				Synset[] synsets = iw.getSenses();
-				for(Synset s : synsets){
-					System.out.println(s);
-					PointerTarget[] targets = s.getTargets(PointerType.HYPERNYM);
-					for (PointerTarget target : targets) {
-						Word[] words = ((Synset) target).getWords();
-						for (Word word : words) {
-							System.out.println(word);
-						}
-					}
-				}
-			}
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-	}
-	
-	public Synset getLCS(Synset synset1, Synset synset2, String pos, ICFinder icFinder) throws JWNLException
-    {
-            // synset1
-            HashSet<Synset> s1 = new HashSet<Synset>(); s1.add(synset1);
-            HashSet<Synset> h1 = new HashSet<Synset>();
-            h1 = getHypernyms(s1,h1);
-            // !!! important !!! we must add the original {synset} back in, as the 2 {synsets}(senses) we are comparing may be equivalent i.e. bthe same {synset}!
-            h1.add(synset1);
-            //System.out.println(">>>>>>>>>>>>>>>>>>>>>");
-            // synset2
-            HashSet<Synset> s2 = new HashSet<Synset>();  s2.add(synset2);
-            HashSet<Synset> h2 = new HashSet<Synset>();
-            h2  = getHypernyms(s2,h2);
-            h2.add(synset2); // ??? don't really need this ???
-            //System.out.println("JWNL,h1, "+toStr(synset1.getWords())+", :h2, "+toStr(synset2.getWords())+" ,=, "+h1.size()+", "+h2.size());
-            // get the candidate <lcs>s i.e. the intersection of all <hypernyms> | {synsets} which subsume the 2 {synsets}
-            /*System.out.println("========================");
-            System.out.println(h1);
-            System.out.println(h2);
-            System.out.println("========================");*/
-            h1.retainAll(h2);
-            if(h1.isEmpty())
-            {
-                    return (null); // i.e. there is *no* <LCS> for the 2 synsets
-            }
-
-            // get *a* <lcs> with the highest Information Content
-            double          max             = -Double.MAX_VALUE;
-            Synset  maxlcs  =       null;
-            for (Synset h : h1) 
-            {
-                    double ic = icFinder.getIC("" + h.getOffset(), pos); // use ICfinder to get the Information Content value
-                    if(ic > max)
-                    {
-                            max             =       ic;
-                            maxlcs  =       h;
-                    }
-            }
-            return maxlcs; // return the <synset} with *a* highest IC value
-    }
-
-
-    // 1.1 GET <HYPERNYMS>
-    private  HashSet<Synset> getHypernyms(HashSet<Synset> synsets, HashSet<Synset> allhypernms) throws JWNLException
-    {
-            if(allhypernms.size()>= 100){
-                    return allhypernms;
-            }
-            
-            //System.out.println("IP: " + synsets);
-            HashSet<Synset>         hypernyms       =       new HashSet<Synset>();
-            for(Synset s : synsets)
-            {       
-
-                    PointerTarget[] hyp = s.getTargets(PointerType.HYPERNYM);                                       // get the <hypernyms> if there are any
-                    for (PointerTarget pointerTarget : hyp) {
-                            if (pointerTarget instanceof Synset) {
-                                    Synset poiSyn = (Synset) pointerTarget;
-                                    hypernyms.add(poiSyn);
-                            }/*else{
-                                    //System.out.println("PointerTarget is not instanceof Synset: "+pointerTarget);
-                            }*/
-                    }
-                    //System.out.println("\t"+hypernyms);
-            }
-            if(!hypernyms.isEmpty())
-            {
-                    if(allhypernms.size()+hypernyms.size()>= 100){
-                            return allhypernms;
-                    }
-                    try {
-                            allhypernms.addAll(hypernyms);
-                    } catch (StackOverflowError e) {
-                            //System.out.println(allhypernms.size());
-                            //System.out.println(hypernyms.size());
-                            //e.printStackTrace();
-                            System.gc();
-                            System.gc();
-                            System.err.println(e.getMessage());
-                            return allhypernms;
-                    }
-                    allhypernms = getHypernyms(hypernyms, allhypernms);
-            }
-            //System.out.println(allhypernms);
-            return allhypernms;
-    }
-    
-    /**
-     * since this method is heavily used, inner cache would help for e.g.
-     * calculating similarity matrix
-     * 
-     * Suroutine that returns an array of hypernym trees, given the offset of #
-     * the synset. Each hypernym tree is an array of offsets.
-     * 
-     * @param synset
-     * @param mode
-     */
-    public List<List<PointerTarget>> getHypernymTrees(PointerTarget synset, Set<PointerTarget> history) {
-    	PointerTarget key = synset;
-            
-            // check if the input synset is one of the imaginary root nodes
-            if (synset.equals(new Synset(POS.NOUN, 0, new Word[]{new Word("ROOT", "ROOT", 0)}, null, null, null))) {
-                    List<PointerTarget> tree = new ArrayList<PointerTarget>();
-                    tree.add(new Synset(POS.NOUN, 0, new Word[]{new Word("ROOT", "ROOT", 0)}, null, null, null));
-                    List<List<PointerTarget>> trees = new ArrayList<List<PointerTarget>>();
-                    trees.add(tree);
-                    return trees;
-            }
-
-            List<PointerTarget> synlinks = null;
-			try {
-				synlinks = Arrays.asList(synset.getTargets(PointerType.HYPERNYM));
-			} catch (JWNLException e) {
-				// TODO Auto-generated catch block
-				e.printStackTrace();
-			}
-            
-            List<List<PointerTarget>> returnList = new ArrayList<List<PointerTarget>>();
-            if (synlinks.size() == 0) {
-                    List<PointerTarget> tree = new ArrayList<PointerTarget>();
-                    tree.add(synset);
-                    tree.add(0, new Synset(POS.NOUN, 0, new Word[]{new Word("ROOT", "ROOT", 0)}, null, null, null));
-                    returnList.add(tree);
-            } else {
-                    for (PointerTarget hypernym : synlinks) {
-                            if ( history.contains(hypernym) ) continue;
-                            history.add(hypernym);
-                            
-                            List<List<PointerTarget>> hypernymTrees = getHypernymTrees(hypernym, history);
-                            if ( hypernymTrees!=null ) { 
-                                    for (List<PointerTarget> hypernymTree : hypernymTrees) {
-                                            hypernymTree.add(synset);
-                                            returnList.add(hypernymTree);
-                                    }
-                            }
-                            if (returnList.size() == 0) {
-                                    List<PointerTarget> newList = new ArrayList<PointerTarget>();
-                                    newList.add(synset);
-                                    newList.add(0, new Synset(POS.NOUN, 0, new Word[]{new Word("ROOT", "ROOT", 0)}, null, null, null));
-                                    returnList.add(newList);
-                            }
-                    }
-            }
-
-            return returnList;
-    }
-	
-	
-	public static void main(String[] args) {
-		System.out.println(new WordnetSimilarity().computeSimilarity("writer", "teacher", POS.NOUN));
-		
-//		ILexicalDatabase db = new NictWordNet();
-//		System.out.println(new Lin(db).calcRelatednessOfWords("writer", "teacher"));
-	}
-
-}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[DL-Learner SVN] SF.net SVN: dl-learner:[3499] trunk/components-core/src/main/java/org/ dllearner/u

[DL-Learner SVN] SF.net SVN: dl-learner:[3499] trunk/components-core/src/main/java/org/ dllearner/utilities