class single_state:
begin=0
end=1
prob=0
word=''
def init(self,bs,es,pb,w):
self.begin=bs
self.end=es
self.prob=pb
self.word=w
def b_state(self):
return begin
def e_state(self):
return end
def probility(self):
return prob
def s_word(self):
return word
def repr(self):
return 'T %4d %4d %4s %s' % (self.begin,self.end,self.prob,self.word)
def g_split(sen):
''' change string as '1,2,3,[[4,5],6]' to a list [1,2,3,[[4,5],6]] '''
s_list=sen.replace('[',' [ ').replace(']',' ] ')
stack_list=[]
for i in s_list.split():
if i != ']':
stack_list.insert(0,i)
else:
idx=stack_list.index('[')
tqu=stack_list[0:idx]
stack_list[0:idx+1]=[tqu]
stack_list.reverse()
return stack_list
class flat_list:
''' flat list as [2,3,[4,5,[6,7],8],9] to [2,3,4,5,6,7,8,9] '''
def init(self,org_list):
self.ret_list=[]
self.__deal_list(org_list)
def __deal_list(self,var):
for i in var:
if type(i) != types.ListType:
self.ret_list.append(i)
else:
self.__deal_list(i)
def value(self):
return self.ret_list
class ShpinxGrammarParser:
b_tag='FSG_BEGIN'
e_tag='FSG_END'
def init(self,sent):
#need a list as parameter
self.start_s=0
self.final_s=1
self.count_s=0
self.sentenses=[]
self.t_tags=[]
sentenses=self.__format_sent(sent)
self.__parse()
def __format_sent(self,sent):
for i in sent:
self.sentenses.append(g_split(i))
def __parse(self):
''' if you do not very know about this function,do not touch it! '''
rate=lambda x: '%.2f' % (1.0/x)
def show(self):
output_list=[]
output_list.append(self.b_tag)
output_list.append('N\t%d' % (self.count_s))
output_list.append('S\t0')
output_list.append('F\t1')
output_list.extend(['%s' % Trans for Trans in self.t_tags])
output_list.append(self.e_tag)
print '\n'.join(output_list)
a=ShpinxGrammarParser(['[this that][is are][ one two three]','[nice glade] to meet [you her]'])
a.show()
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Hi,
I have written a python script to generate FSG file,maybe it is not smart enougt,and maybe there are many thing I shoud know to make FSG better,but I do not have further detailed document.at least it works.
here is the source code:
!/usr/bin/env python
import re
import types
import sys
class single_state:
begin=0
end=1
prob=0
word=''
def init(self,bs,es,pb,w):
self.begin=bs
self.end=es
self.prob=pb
self.word=w
def b_state(self):
return begin
def e_state(self):
return end
def probility(self):
return prob
def s_word(self):
return word
def repr(self):
return 'T %4d %4d %4s %s' % (self.begin,self.end,self.prob,self.word)
def g_split(sen):
''' change string as '1,2,3,[[4,5],6]' to a list [1,2,3,[[4,5],6]] '''
s_list=sen.replace('[',' [ ').replace(']',' ] ')
stack_list=[]
for i in s_list.split():
if i != ']':
stack_list.insert(0,i)
else:
idx=stack_list.index('[')
tqu=stack_list[0:idx]
stack_list[0:idx+1]=[tqu]
stack_list.reverse()
return stack_list
class flat_list:
''' flat list as [2,3,[4,5,[6,7],8],9] to [2,3,4,5,6,7,8,9] '''
def init(self,org_list):
self.ret_list=[]
self.__deal_list(org_list)
class ShpinxGrammarParser:
b_tag='FSG_BEGIN'
e_tag='FSG_END'
def init(self,sent):
#need a list as parameter
self.start_s=0
self.final_s=1
self.count_s=0
self.sentenses=[]
self.t_tags=[]
sentenses=self.__format_sent(sent)
self.__parse()
def__format_sent(self,sent):foriinsent:self.sentenses.append(g_split(i))def__parse(self):''' if you do not very know about this function,do not touch it! '''rate=lambdax:'%.2f'%(1.0/x)first_branch=0foriinself.sentenses:iftype(i[0])==types.StringType:first_branch+=1else:first_branch+=len(flat_list(i[0]).value())first_rate=rate(first_branch)c_state=2foriinself.sentenses:p_list=[]p_state=0forjinrange(0,len(i)):ifj==0:iftype(i[j])==types.StringType:tmp_state=single_state(0,c_state,first_rate,i[j])self.t_tags.append(tmp_state)p_state=c_statep_list=[]c_state+=1else:forkinflat_list(i[j]).value():tmp_state=single_state(0,c_state,first_rate,k)self.t_tags.append(tmp_state)p_list.append(c_state)c_state+=1elifj!=len(i)-1:iftype(i[j])==types.StringType:iflen(p_list)!=0:forkinp_list:tmp_state=single_state(k,c_state,1,i[j])self.t_tags.append(tmp_state)else:tmp_state=single_state(p_state,c_state,1,i[j])self.t_tags.append(tmp_state)p_state=c_statec_state+=1p_list=[]else:iflen(p_list)!=0:tmp_p_list=[]forkinflat_list(i[j]).value():forlinp_list:tmp_state=single_state(l,c_state,rate(len(flat_list(i[j]).value())),k)self.t_tags.append(tmp_state)tmp_p_list.append(c_state)c_state+=1p_list=tmp_p_listelse:forkinflat_list(i[j]).value():tmp_state=single_state(p_state,c_state,rate(len(flat_list(i[j]).value())),k)self.t_tags.append(tmp_state)p_list.append(c_state)c_state+=1else:iftype(i[j])==types.StringType:iflen(p_list)!=0:forkinp_list:tmp_state=single_state(k,1,1,i[j])self.t_tags.append(tmp_state)else:tmp_state=single_state(p_state,1,1,i[j])self.t_tags.append(tmp_state)else:iflen(p_list)!=0:forkinflat_list(i[j]).value():forlinp_list:tmp_state=single_state(l,1,1,k)self.t_tags.append(tmp_state)else:forkinflat_list(i[j]).value():tmp_state=single_state(p_state,1,1,k)self.t_tags.append(tmp_state)self.count_s=c_statedefshow(self):output_list=[]output_list.append(self.b_tag)output_list.append('N\t%d'%(self.count_s))output_list.append('S\t0')output_list.append('F\t1')output_list.extend(['%s' % Trans for Trans in self.t_tags])output_list.append(self.e_tag)print'\n'.join(output_list)
a=ShpinxGrammarParser(['[this that][is are][ one two three]','[nice glade] to meet [you her]'])
a.show()
as you see,it can generate FSG from a list,each element of the list is a string,represent a sentence.it support "[]" ,means you can pick one of them.
as the example:
two sentence:
please forget about the englist grammar :),just a example
[this that][is are][ one two three] [nice glade] to meet [you her]
could be sentense:
this is one
this is two
this is three
that is one
that is two
that is three
this are one
this are two
this are three
nice to meet you
nice to meet her
glade to meet you
glade to meet her
the output FSG would be:
FSG_BEGIN
N 10
S 0
F 1
T 0 2 0.25 that
T 0 3 0.25 this
T 2 4 0.50 are
T 3 4 0.50 are
T 2 5 0.50 is
T 3 5 0.50 is
T 4 1 1 three
T 5 1 1 three
T 4 1 1 two
T 5 1 1 two
T 4 1 1 one
T 5 1 1 one
T 0 6 0.25 glade
T 0 7 0.25 nice
T 6 8 1 to
T 7 8 1 to
T 8 9 1 meet
T 9 1 1 her
T 9 1 1 you
FSG_END
any suggestion for this ?
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Can anyone tell me exactly what they do and how to create one?
I cant seem to find a tutorial or much information on them.
Thanks!
what's FST?I only heart about FSG
http://cmusphinx.sourceforge.net/sphinx2/doc/sphinx2.html#sec_fsgfmt
Just a few days ago JFSG support appeared in sphinx though, see:
https://sourceforge.net/mailarchive/forum.php?thread_name=46DDA49B.10304%40cs.cmu.edu&forum_name=cmusphinx-sdmeet
I put a copy here:
http://groups.google.com/group/comp.speech.research/browse_thread/thread/adfb783f4e1b8aaa/0a3b630a97c4fe4a?hl=en#0a3b630a97c4fe4a
but it still have indent error,what could I do?
try again:
!/usr/bin/env python
import re
import types
import sys
class single_state:
begin=0
end=1
prob=0
word=''
def init(self,bs,es,pb,w):
self.begin=bs
self.end=es
self.prob=pb
self.word=w
def b_state(self):
return begin
def e_state(self):
return end
def probility(self):
return prob
def s_word(self):
return word
def repr(self):
return 'T %4d %4d %4s %s' % (self.begin,self.end,self.prob,self.word)
def g_split(sen):
''' change string as '1,2,3,[[4,5],6]' to a list [1,2,3,[[4,5],6]] '''
s_list=sen.replace('[',' [ ').replace(']',' ] ')
stack_list=[]
for i in s_list.split():
if i != ']':
stack_list.insert(0,i)
else:
idx=stack_list.index('[')
tqu=stack_list[0:idx]
stack_list[0:idx+1]=[tqu]
stack_list.reverse()
return stack_list
class flat_list:
''' flat list as [2,3,[4,5,[6,7],8],9] to [2,3,4,5,6,7,8,9] '''
def init(self,org_list):
self.ret_list=[]
self.__deal_list(org_list)
def __deal_list(self,var):
for i in var:
if type(i) != types.ListType:
self.ret_list.append(i)
else:
self.__deal_list(i)
def value(self):
return self.ret_list
class ShpinxGrammarParser:
b_tag='FSG_BEGIN'
e_tag='FSG_END'
def init(self,sent):
#need a list as parameter
self.start_s=0
self.final_s=1
self.count_s=0
self.sentenses=[]
self.t_tags=[]
sentenses=self.__format_sent(sent)
self.__parse()
def __format_sent(self,sent):
for i in sent:
self.sentenses.append(g_split(i))
def __parse(self):
''' if you do not very know about this function,do not touch it! '''
rate=lambda x: '%.2f' % (1.0/x)
def show(self):
output_list=[]
output_list.append(self.b_tag)
output_list.append('N\t%d' % (self.count_s))
output_list.append('S\t0')
output_list.append('F\t1')
output_list.extend(['%s' % Trans for Trans in self.t_tags])
output_list.append(self.e_tag)
print '\n'.join(output_list)
a=ShpinxGrammarParser(['[this that] [is are] [ one two three]','[nice glade] to meet [you her]'])
a.show()
I surrender,I give up,I can't post it correctly any way.
Hi,
I have written a python script to generate FSG file,maybe it is not smart enougt,and maybe there are many thing I shoud know to make FSG better,but I do not have further detailed document.at least it works.
here is the source code:
!/usr/bin/env python
import re
import types
import sys
class single_state:
begin=0
end=1
prob=0
word=''
def init(self,bs,es,pb,w):
self.begin=bs
self.end=es
self.prob=pb
self.word=w
def b_state(self):
return begin
def e_state(self):
return end
def probility(self):
return prob
def s_word(self):
return word
def repr(self):
return 'T %4d %4d %4s %s' % (self.begin,self.end,self.prob,self.word)
def g_split(sen):
''' change string as '1,2,3,[[4,5],6]' to a list [1,2,3,[[4,5],6]] '''
s_list=sen.replace('[',' [ ').replace(']',' ] ')
stack_list=[]
for i in s_list.split():
if i != ']':
stack_list.insert(0,i)
else:
idx=stack_list.index('[')
tqu=stack_list[0:idx]
stack_list[0:idx+1]=[tqu]
stack_list.reverse()
return stack_list
class flat_list:
''' flat list as [2,3,[4,5,[6,7],8],9] to [2,3,4,5,6,7,8,9] '''
def init(self,org_list):
self.ret_list=[]
self.__deal_list(org_list)
class ShpinxGrammarParser:
b_tag='FSG_BEGIN'
e_tag='FSG_END'
def init(self,sent):
#need a list as parameter
self.start_s=0
self.final_s=1
self.count_s=0
self.sentenses=[]
self.t_tags=[]
sentenses=self.__format_sent(sent)
self.__parse()
a=ShpinxGrammarParser(['[this that] [is are] [ one two three]','[nice glade] to meet [you her]'])
a.show()
as you see,it can generate FSG from a list,each element of the list is a string,represent a sentence.it support "[]" ,means you can pick one of them.
as the example:
two sentence:
please forget about the englist grammar :),just a example
[this that] [is are] [ one two three]
[nice glade] to meet [you her]
could be sentense:
this is one
this is two
this is three
that is one
that is two
that is three
this are one
this are two
this are three
nice to meet you
nice to meet her
glade to meet you
glade to meet her
the output FSG would be:
FSG_BEGIN
N 10
S 0
F 1
T 0 2 0.25 that
T 0 3 0.25 this
T 2 4 0.50 are
T 3 4 0.50 are
T 2 5 0.50 is
T 3 5 0.50 is
T 4 1 1 three
T 5 1 1 three
T 4 1 1 two
T 5 1 1 two
T 4 1 1 one
T 5 1 1 one
T 0 6 0.25 glade
T 0 7 0.25 nice
T 6 8 1 to
T 7 8 1 to
T 8 9 1 meet
T 9 1 1 her
T 9 1 1 you
FSG_END
any suggestion for this ?
Oh,shit,after I post the python script ,the indent is wrong,how could I do?If the indent is wrong,you can't use this python script any more.