Re: [Modeling-users] Time for a Murphys...
Status: Abandoned
Brought to you by:
sbigaret
|
From: Yannick G. <ygi...@yg...> - 2003-03-08 13:44:41
|
-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 On Saturday 08 March 2003 07:47, Sebastien Bigaret wrote: > > On Friday 07 March 2003 03:34 pm, Mario Ruggier wrote: > > > After looking at the Modeling sourceforge page so many times, the > > > initials of MORBY (approximately), for Modeling OR-Bridge for Python, > > > start to stand-out... but, this could be not such a bad unique name (in > > > lowercase, of course) for the framework. However, this also makes > > > other, cuter, names come to mind, such as Morfy (where the bridge > > > becomes a framework) that in turn brings to mind the action of > > > morphing (relational data to objects, and back), which points to > > > another possible name, Morphy, for Modeling, Object-Relational > > > Phramework for Python ;-) Which, in turn, would beg that an > > > OR model be called a MetaMorphy... Hmmn, mario > > > > I really like Morphy ! > > Guys, this seems a good idea! I like it too, it has a great resonance. > After letting my brain wandering on that while drinking coffees, I came to > Morph, Morpheus, and ten minutes later: Morphia, for something like: > ''Morphia: an Object-Relational Philter of Addiction'' --then I guess I > must admit that I have a weakness for auto-referent/recursive acronyms :). > > My .02c! I have the feeling that these wanderings may eventually find a > cool name for the fr... oops, phramework ;) How about ORCHYDE: Object-Relational Class HierarachY Definitive Ecosystem I also have this small script that we use on the OBB website for generating random but coherant acronymes. OK it's really confusing because it alway talk about Acromynes wich is a mistake but the script was already finished when I realised... It support fetching word from a dict server (which is really too slow) and from a worldnet database that should be extracted in ./wndict . It works best with short (3 or 4 letters) words. Typical usage : $ python OBBAnagramer.py updatewn $ python OBBAnagramer.py generate Anyway, here it is : #!/usr/bin/python # Copyright (C) 2002 Yannick Gingras <ygi...@yg...> # This file is part of Open Beat Box. # Open Beat Box is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # Open Beat Box is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with Open Beat Box; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import re import sys import traceback import thread from pickle import Pickler, Unpickler import time from random import Random import os import os.path #SERVER = "dict.org" #SERVER = "www.hyperdictionary.com" SERVER = "localhost" WN_DIR = "wndict" WORDS_FILE = "obb_words.ana" NB_THREAD = 5 V = "v." N = "n." ADJ = "a." ADV = "adv." O = "o" B = "b" MAX_ACRONYMES = 20000 #WORD = (O, B, B) WORD = ("b", "i", "t", "c", "h") #LETTERS = (O, B) LETTERS = WORD """ ANAGRAM_TYPES = [ (ADV, ADJ, N), # ( N, ADV, ADJ), (ADJ, ADJ, N), # ( N, V, N), # (ADJ, N, N), # (ADJ, N, ADJ) ] """ ANAGRAM_TYPES = [ (ADV, ADJ, ADJ, ADJ, N), (ADJ, ADV, ADJ, ADJ, N), (ADJ, ADJ, ADV, ADJ, N), (ADJ, ADJ, ADJ, ADV, N), (ADV, ADJ, ADV, ADJ, N), ] WORD_TYPE_REGEXES = { V: re.compile('.*\\, %s ' % V ), N: re.compile('.*\\, %s ' % N ), ADJ: re.compile('.*\\, %s ' % ADJ ), ADV: re.compile('.*\\, %s ' % ADV ) } WN_TYPE_INDEXES = { V: "index.verb", N: "index.noun", ADJ: "index.adj", ADV: "index.adv" } words = { V: [], N: [], ADV: [], ADJ: [] } # get words from a dict server def getRawWords(): from dictclient import Connection, TemporarilyUnavailable # 1st, get a list of the words print "Matching..." conn = Connection(SERVER) # restore corupted connection words = {} for letter in LETTERS: words[letter] = {} defs = conn.match("web1913", "prefix", letter) cnt = 0 for definition in defs: cnt += 1 try: detail = definition.getdefstr() word = definition.getword() words[letter][word] = detail print "Retreiving %4d of %d : %s" % (cnt, len(defs), word) except IndexError: conn = Connection(SERVER) # restore corupted connection except ValueError: conn = Connection(SERVER) # restore corupted connection except Exception, e: if str(e).find("Unknown code") != 1: conn = Connection(SERVER) # restore corupted connection else: raise return words def analyseWords(words): print "Analysing..." wordMap = {} for letter in words.keys(): wordMap[letter] = {} cnt = 0 # initialize the map for type in WORD_TYPE_REGEXES.keys(): wordMap[letter][type] = [] for word in words[letter].keys(): cnt += 1 print "Analysing %4d of %d : %s" % ( cnt, len(words[letter].keys()), word ) for type in WORD_TYPE_REGEXES.keys(): if WORD_TYPE_REGEXES[type].search(words[letter][word]): wordMap[letter][type].append(word) return wordMap # get the words from a World Net database # unlike with the dict servers, the WN DBs are well organised and it # eassy to know wich word is a noun, a verb... def getRawWordsWN(): # some init print "Matching..." words = {} for letter in LETTERS: words[letter] = {} for type in WN_TYPE_INDEXES.keys(): words[letter][type] = [] # reverse order from what we do with a dictd for type in WN_TYPE_INDEXES.keys(): typeIndex = open(os.path.join(WN_DIR, WN_TYPE_INDEXES[type])) record = typeIndex.readline() while record: letter = record[0].lower() if letter in LETTERS: word = record[:record.find(" ")] # skip the words with underscores if word.find("_") == -1: words[letter][type].append(word) record = typeIndex.readline() return words def printStats(wordMap): for letter in wordMap.keys(): print "%s:" % letter for type in wordMap[letter].keys(): print "%6s : %4d" % (type, len(wordMap[letter][type])) def genAnagram(wordMap): usageMap = {} # some init stuff for letter in WORD: usageMap[letter] = [] random = Random(time.time()) for anaType in ANAGRAM_TYPES: print "#" * 65 print "#\n" * 10, print "# Type : %s" % str(anaType) print "#\n" * 10, print "#" * 65 # we do not process all the possible possibilities, # when we generate a few already generated anagrams, # we have enough of them ; ) # we do this to spare us the job of shufeling the resulting list # (and maybe some time too...) nbClash = 0 nbAna = 0 maxClash = 5 anagrams = {} # hash are faster and we have a lot of records while nbClash < maxClash and nbAna < MAX_ACRONYMES: #for j in range(1000): anagram = () for i in range(len(WORD)): curType = anaType[i] curWordList = wordMap[WORD[i]][curType] anagram += (curWordList[random.randrange(len(curWordList))], ) if not anagrams.has_key(anagram): anagrams[anagram] = "" nbAna += 1 for i in range(len(WORD)): print "%s%s" % ( anagram[i][:1].upper(), anagram[i][1:].lower() ), print "" else: nbClash += 1 if sys.argv[1] == "update": wordList = getRawWords() wordMap = analyseWords(wordList) Pickler(open(WORDS_FILE, "w")).dump(wordMap) elif sys.argv[1] == "updatewn": wordMap = getRawWordsWN() Pickler(open(WORDS_FILE, "w")).dump(wordMap) elif sys.argv[1] == "check": print "Counting..." wordMap = Unpickler(open(WORDS_FILE)).load() printStats(wordMap) elif sys.argv[1] == "generate": wordMap = Unpickler(open(WORDS_FILE)).load() genAnagram(wordMap) else: print "USAGE : OBBAnagramer.py update|updatewn|check|generate" # # The END ! # - -- Yannick Gingras Coder for OBB : Organically Bimillenial Bob http://OpenBeatBox.org -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.0.7 (GNU/Linux) iD8DBQE+afPDrhy5Fqn/MRARAoQyAJ9jNGvJkByG1L0xGe0qRJnjmARGOQCfRHJ/ 73SqSARor672EufWrz/oqls= =W4Va -----END PGP SIGNATURE----- |