From: Martin F. <mar...@us...> - 2007-12-26 16:01:46
|
Update of /cvsroot/arianne/stendhal/src/games/stendhal/server/entity/npc/newparser In directory sc8-pr-cvs11.sourceforge.net:/tmp/cvs-serv9838/src/games/stendhal/server/entity/npc/newparser Added Files: WordList.java WordEntry.java words.txt Log Message: list based word categorisation to be used in the new conversation parser --- NEW FILE: WordEntry.java --- package games.stendhal.server.entity.npc.newparser; import java.io.PrintWriter; /** * Word list entry, used to categorize words * Nouns and verbs can be associated with their plural form. * * @author Martin Fuchs */ public class WordEntry { public String word; /** word */ public String type; /** word type, e.g. VER, ADJ, PLU, PLU-ANI, ... */ public String plural; /** pluralised word (or singular for entries of type ...-PLU */ public Integer value; /** numeric value for words of type NUM */ public void print(PrintWriter pw) { pw.printf("%s\t", word); if (type != null) { pw.print(type); } if (plural != null) { pw.printf("\t%s", plural); } if (value != null) { pw.printf("\t%d", value); } } } --- NEW FILE: WordList.java --- package games.stendhal.server.entity.npc.newparser; import games.stendhal.common.Grammar; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import java.util.TreeMap; import marauroa.common.Log4J; import org.apache.log4j.Logger; /** * Word list manager * Words are categorized by type (noun, verb, adjective, preposition) * and optionally sub-types (animals, food, fluids, ...). * * @author Martin Fuchs */ public class WordList { private static Logger logger = Logger.getLogger(WordList.class); Map<String, WordEntry> words = new TreeMap<String, WordEntry>(); List<String> comments = new ArrayList<String>(); private static WordList instance = new WordList(); // initialise word list from the input file "words.txt" static { Log4J.init(); InputStream str = WordList.class.getResourceAsStream("words.txt"); BufferedReader reader = new BufferedReader(new InputStreamReader(str)); instance.read(reader); try { reader.close(); } catch(IOException e) { e.printStackTrace(); } } public static WordList getInstance() { return instance; } public void read(BufferedReader reader) { try { for(;;) { String line = reader.readLine(); if (line == null) break; StringTokenizer tk = new StringTokenizer(line); if (!tk.hasMoreTokens()) continue; String key = tk.nextToken(); if (key.startsWith("#")) { comments.add(line); } else { WordEntry entry = new WordEntry(); entry.word = key; if (tk.hasMoreTokens()) { entry.type = tk.nextToken(); if (tk.hasMoreTokens()) { String s = tk.nextToken(); if (entry.type.startsWith("NUM")) entry.value = new Integer(s); else entry.plural = s; } if (Character.isLowerCase(entry.type.charAt(0))) { entry.plural = entry.type; entry.type = "NOU"; } else if (entry.plural==null && entry.type.startsWith("NOU") && !entry.type.endsWith("NAM")) { String plural = Grammar.plural(key); // only store single word plurals if (plural.indexOf(' ') == -1) entry.plural = plural; } else if (entry.plural != null){ String plural = Grammar.plural(key); if (plural.indexOf(' ')==-1 && !plural.equals(entry.plural) && !Grammar.isSubject(entry.word) && !entry.word.equals("is")) { logger.error(String.format("suspicious plural: %s -> %s (%s?)", entry.word, entry.plural, plural)); } } while(tk.hasMoreTokens()) { logger.error("superflous trailing word: " + tk.nextToken()); } } words.put(key.toLowerCase(), entry); // store plural and associate with singular form if (entry.plural!=null && !entry.plural.equals(entry.word)) { WordEntry pluralEntry = new WordEntry(); pluralEntry.word = entry.plural; pluralEntry.type = entry.type + "-PLU"; pluralEntry.plural = entry.word; pluralEntry.value = entry.value; WordEntry prev = words.put(entry.plural.toLowerCase(), pluralEntry); if (prev != null) { logger.debug(String.format("ambiguos plural: %s/%s -> %s", pluralEntry.plural, prev.plural, entry.plural)); pluralEntry.plural = null; prev.plural = null; } } } } } catch(IOException e) { e.printStackTrace(); } } /** * find an entry for a given word * @param s * @return Word */ public WordEntry find(String s) { WordEntry w = words.get(s.toLowerCase()); return w; } /** * print all words sorted by known types * @param writer */ public void write(PrintWriter writer) { for(String c : comments) { writer.println(c); } writer.println(); printWordType(writer, "VER"); writer.println(); printWordType(writer, "NOU"); writer.println(); printWordType(writer, "ADJ"); writer.println(); printWordType(writer, "NUM"); writer.println(); printWordType(writer, "PRE"); writer.println(); printWordType(writer, "IGN"); writer.println(); printWordType(writer, null); } /** * print all words of a given (main-)type * * @param writer * @param type */ private void printWordType(PrintWriter writer, String type) { for(String word : words.keySet()) { WordEntry w = words.get(word); boolean matches; if (type == null) { matches = w.type==null; } else { matches = w.type!=null && w.type.startsWith(type) && !w.type.endsWith("-PLU"); } if (matches) { w.print(writer); writer.println(); } } } /** * main() function for WordList to read word list * and print out in a sorted, formated way * * @param args */ public static void main(String[] args) { PrintWriter writer = new PrintWriter(System.out); instance.write(writer); writer.close(); } } --- NEW FILE: words.txt --- # # Word list for the Stendhal conversation parser # @author Martin Fuchs # # The list entries are in the following format: # WORD TYPE [PLURAL/VALUE] # # WORD is the word in lower case. # PLURAL is the pluralised form of the word for nouns and verbs. # VALUE is used to specify the numeric equivalent of numeric expressions. # # TYPE is one of the following constants: # # VER verb # # NOU noun # NOU-AMO noun specifying amount # NOU-ANI noun, animal # NOU-FOO noun, food # NOU-FLU noun, fluid # NOU-PER person # NOU-PER-NAM person name # # ADJ adjective/adverb # ADJ-COL color expressions # # NUM numeral # PRE preposition # IGN word to ignore # buy VER close VER drop VER give VER go VER hold VER is VER are look VER open VER say VER speak VER summon VER alga NOU algae analysis NOU analyses antidote NOU antidotes apple NOU apples arandula NOU archer NOU-PER archers archmage NOU-PER archmages archrat NOU-ANI archrats armor NOU armors arrow NOU arrows arundula NOU arundulas axe NOU axes baby NOU-PER babies bag NOU bags bar NOU bars bardiche NOU bardiches bat NOU bats battle NOU battles bear NOU bears beer NOU-FLU beholder NOU beholders boar NOU boars bob NOU-PER-NAM body NOU bodies book NOU books boot NOU boots boss NOU-PER bosses bottle NOU bottles bow NOU bows box NOU boxes boy NOU-PER boys bread NOU broadsword NOU broadswords bronze NOU bronzes buckler NOU bucklers bureau NOU bureaux bush NOU bushes button NOU buttons caboose NOU cabooses carrot NOU-FOO carrots cat NOU-ANI cats caverat NOU-ANI caverats chain NOU chains cheese NOU-FOO cheeses cherry NOU-FOO cherries chicken NOU-FOO chickens chief NOU-PER chiefs child NOU-PER children chunk NOU chunks claymore NOU claymores cloak NOU cloaks club NOU clubs cobra NOU-ANI cobras commander NOU commanders coupon NOU coupons crab NOU crabs creature NOU creatures crossbow NOU crossbows crown NOU crowns cuirasses NOU cuirasses cyclops NOU cyclopses dagger NOU daggers death NOU deaths demon NOU-PER demons deus NOU-PER dei dice NOU dice djinni NOU-PER djinn dog NOU-ANI dogs doll NOU dolls dragon NOU dragons dress NOU dresses dwarf NOU dwarves earth NOU earths eater NOU eaters efreeti NOU-PER efreet elemental NOU elementals elephant NOU-ANI elephants elf NOU-PER elves erinys NOU erinyes eupepsia NOU eupepsia fire NOU fires flail NOU flails flask NOU flasks flour NOU flours foot NOU feet gargoyle NOU gargoyles ghost NOU-PER ghosts giantrat NOU-ANI giantrats gnome NOU-PER gnomes goblin NOU-PER goblins golem NOU-PER golems goose NOU-ANI geese grain NOU-FOO grains guardian NOU-PER guardians halberd NOU halberds ham NOU hams hammer NOU hammers hand NOU hands hat NOU hats he NOU-PER they helium NOU helia helmet NOU helmets hero NOU heros home NOU homes house NOU houses human NOU humans hunter NOU hunters hypha NOU hyphae i NOU-PER we ice NOU ices index NOU indices iron NOU irons it NOU-OBJ they katana NOU katanas key NOU keys knife NOU knives knight NOU knights kobold NOU kobolds kymara NOU-PER-NAM larva NOU larvae leader NOU-PER leaders leg NOU legs lich NOU liches lion NOU-ANI lions loaf NOU-AMO loaves log NOU logs longbow NOU longbows lotus NOU lotuses mace NOU maces mage NOU-PER mages man NOU-PER men map NOU maps match NOU matches matrix NOU matrices meat NOU militia NOU militia money NOU money monkey NOU monkeys moose NOU moose mouse NOU mice mumak NOU mumakil mummy NOU mummies mushroom NOU-FOO mushrooms noose NOU nooses note NOU notes nugget NOU nuggets ogre NOU ogres orc NOU orcs ore NOU ores pair NOU pairs paper NOU papers penguin NOU-ANI penguins pie NOU-FOO pies piece NOU pieces plate NOU plates player NOU-PER players plural NOU plurals poison NOU-FLU poisons porcini NOU-FOO porcini potion NOU-FLU potions present NOU presents princess NOU princesses rat NOU-ANI rats ratman NOU-PER ratmen ratwoman NOU-PER ratwomen razorrat NOU-ANI razorrats robin NOU robins rod NOU rods sacerdotist NOU sacerdotists sack NOU-AMO sacks salad NOU salads sandwich NOU sandwiches scale NOU scales scimitar NOU scimitars scroll NOU scrolls scythe NOU scythes she NOU-PER they sheaf NOU-AMO sheaves shield NOU shields silver NOU silvers skeleton NOU skeletons skull NOU skulls slime NOU slimes sodium NOU sodia soldier NOU-PER soldiers sprig NOU-AMO sprigs staff NOU staffs steel NOU steels stone NOU stones suit NOU suits suits NOU suits sun NOU suns sword NOU swords teddy NOU teddies tiger NOU-ANI tigers toadstool NOU toadstools token NOU tokens tomato NOU-FOO tomatoes tooth NOU teeth troll NOU-PER trolls trophy NOU trophies trousers NOU trousers unicorn NOU-ANI unicorns vegetarian NOU-PER vegetarians venom NOU venoms venomrat NOU-ANI venomrats vertex NOU vertices veteran NOU-PER veterans viking NOU-PER vikings vortex NOU vortices war NOU wars warrior NOU-PER warriors water NOU-FLU waters wine NOU-FLU wolf NOU wolves woman NOU-PER women wood NOU woods wumpus NOU-PER wumpuses yclept NOU yclepts you NOU-PER you youngster NOU youngsters zombie NOU-PER zombies big ADJ biting ADJ black ADJ-COL blue ADJ-COL deadly ADJ elder ADJ empty ADJ golden ADJ-COL great ADJ greater ADJ green ADJ-COL leather ADJ marked ADJ minor ADJ old ADJ red ADJ-COL royal ADJ short ADJ silvery ADJ-COL small ADJ studded ADJ twoside ADJ white ADJ-COL wooden ADJ young ADJ a NUM 1 an NUM 1 eight NUM 8 eighteen NUM 18 eighty NUM 80 eleven NUM 11 fifteen NUM 15 fifty NUM 50 first NUM 1 five NUM 5 four NUM 4 fourteen NUM 14 fourty NUM 40 hundred NUM 100 million NUM 1000000 nine NUM 9 nineteen NUM 19 ninety NUM 90 no NUM 0 one NUM 1 second NUM 2 seven NUM 7 seventeen NUM 17 seventy NUM 70 six NUM 6 sixteen NUM 16 sixty NUM 60 ten NUM 10 third NUM 3 thirteen NUM 13 thirty NUM 30 thousand NUM 1000 three NUM 3 twelve NUM 12 twenty NUM 20 two NUM 2 zero NUM 0 of PRE off PRE on PRE under PRE with PRE please IGN the IGN whatever IGN |