I want to parse Windows Resource File with pyparsing, because Menu can have deep nesting structure. It would be very difficult to parse such structure with regular expressions.
Everything works properly, but today I found that my code can only find one instance. To make me clear, here is the contents of *.rc file(E:\tool\res\my.rc, to save space only show the error prone part):
#include"../include/resource.h"IDR_MENU_OPTIONMENUBEGINPOPUP"Options"BEGINMENUITEM"List Layers for &All Pages",IDM_SHOW_ALLMENUITEM"List Layers for &Visible Pages",IDM_SHOW_VISIBLEMENUITEMSEPARATORMENUITEM"&Reset to Initial Visibility",IDM_RESET_INITMENUITEMSEPARATORMENUITEM"E&xpand All",IDM_EXPAND_ALLMENUITEM"C&ollapse All",IDM_COLLAPSE_ALLENDPOPUP""BEGINMENUITEM"List Layers for &All Pages",IDM_LIST_ALLMENUITEM"List Layers for &Visible Pages",IDM_LIST_VISIBLEMENUITEMSEPARATORMENUITEM"&Reset to Initial Visibility",IDM_RESET_INITMENUITEMSEPARATORMENUITEM"E&xpand All",IDM_EXPAND_ALLMENUITEM"C&ollapse All",IDM_COLLAPSE_ALLMENUITEMSEPARATORMENUITEM"Layer &Properties...",IDM_LAYER_PROPERTIESENDENDIDR_MENU_PRPPERTIESMENUBEGIN// the menu block is skiped by pyparsingPOPUP""BEGINMENUITEM"&Show Layers",IDM_SHOWMENUITEM"&Properties...",IDM_PROPERTIESENDMENUITEM"",65535END#endif // not APSTUDIO_INVOKED
my Python code can't find IDR_MENU_PRPPERTIES MENU, the output now is:
importreimportosimportcodecsimportfnmatchfrombs4importUnicodeDammitfromrc_commonimportget_name2idimportxml.etree.ElementTreeasETfrompyparsingimportrestOfLine,cStyleComment,Word,alphanums,alphas,\Optional,SkipTo,ZeroOrMore,Group,Keyword,quotedString,delimitedList,\nums,commaSeparatedList,Forward,CombineclassRcParser:def__init__(self,rc_file):self.rc_file=rc_filehandle=open(rc_file,'rb')binary_data=handle.read()handle.close()dammit=UnicodeDammit(binary_data)self.rc_src=dammit.unicode_markupself.encoding=dammit.original_encodingself.string_table_id=set()self.dialog_id=set()self.menu_id=set()self.img_id=set()self.parse(self.rc_src)defget_rc_header(self):inx=self.rc_file.rfind('\\')path=self.rc_file[:inx+1]file_lists=[path+fileforfileinos.listdir(path)iffile.lower().endswith('resource.h')]ifnotfile_lists:returnNonereturnfile_lists[0]defid_by_parsing_rc(self):rc_id=self.img_id|self.menu_id|self.dialog_id|self.string_table_idreturnrc_iddefrc_statement(self):""" Generate a RC statement parser that can be used to parse a RC file :rtype: pyparsing.ParserElement """one_line_comment='//'+restOfLinecomments=cStyleComment^one_line_commentprecompiler=Word('#',alphanums)+restOfLinelanguage_definition="LANGUAGE"+Word(alphas+'_').setResultsName("language")+Optional(','+Word(alphas+'_').setResultsName("sublanguage"))block_start=(Keyword('{')|Keyword("BEGIN")).setName("block_start")block_end=(Keyword('}')|Keyword("END")).setName("block_end")reserved_words=block_start|block_endname_id=~reserved_words+\Word(alphas,alphanums+'_').setName("name_id")numbers=Word(nums)integerconstant=numbers^Combine('0x'+numbers)constant=Combine(Optional(Keyword("NOT"))+(name_id|integerconstant),adjacent=False,joinString=' ')combined_constants=delimitedList(constant,'|')block_options=Optional(SkipTo(Keyword("CAPTION"),failOn=block_start)("pre_caption")+Keyword("CAPTION")+quotedString("caption"))+SkipTo(block_start)("post_caption")undefined_control=Group(name_id.setResultsName("id_control")+delimitedList(quotedString^constant^numbers^Group(combined_constants)).setResultsName("values_"))block=block_start+\ZeroOrMore(undefined_control)("controls")+block_enddialog=name_id("block_id")+(Keyword("DIALOGEX")|Keyword("DIALOG"))("block_type")+block_options+blockstring_table=Keyword("STRINGTABLE")("block_type")+block_options+blockmenu_item=Keyword("MENUITEM")("block_type")+(commaSeparatedList("values_")|Keyword("SEPARATOR"))popup_block=Forward()popup_block<<=Group(Keyword("POPUP")("block_type")+Optional(quotedString("caption"))+block_start+ZeroOrMore(Group(menu_item|popup_block))("elements")+block_end)("popups*")menu=name_id("block_id")+\Keyword("MENU")("block_type")+block_options+\block_start+ZeroOrMore(popup_block)+block_endstatem=comments^precompiler^language_definition^dialog^string_table^menureturnstatemdefgenerate_menu_pre_name(self,block_type,block_id):"""Return the pre-name generated for elements of a menu."""return"%s.%s"% (block_type, block_id)defgenerate_popup_pre_name(self,pre_name,caption):"""Return the pre-name generated for subelements of a popup. :param pre_name: The pre_name that already have the popup. :param caption: The caption (whitout quotes) of the popup. :return: The subelements pre-name based in the pre-name of the popup and its caption. """return"%s.%s"% (pre_name, caption.replace(" ", "_"))defadd_popup_units(self,pre_name,popup):"""Transverses the popup tree making new units as needed."""forelementinpopup.elements:ifelement.block_typeandelement.block_type=="MENUITEM":ifelement.values_andlen(element.values_)>=2:var=element.values_[1]ifnotvar.isdigit():self.menu_id.add(var)#Elseitcanbeaseparator.elifelement.popups:forsub_popupinelement.popups:self.add_popup_units(self.generate_popup_pre_name(pre_name,popup.caption[1:-1]),sub_popup)defparse(self,rcsrc):"""Read the source of a .rc file in and include them as units."""#Parsethestringsintoastructure.results=self.rc_statement().searchString(rcsrc)forstatementinresults:ifnotstatement.block_type:continueifstatement.block_typein("DIALOG","DIALOGEX"):helper=statement.block_id[0]self.dialog_id.add(statement.block_id[0])control_type=["AUTOCHECKBOX","AUTORADIOBUTTON","CAPTION","CHECKBOX","CTEXT","CONTROL","DEFPUSHBUTTON","GROUPBOX","LTEXT","PUSHBUTTON","RADIOBUTTON","RTEXT","COMBOBOX"]forcontrolinstatement.controls:fk=(control.id_control[0]incontrol_type)flag=(control.values_[0].startswith('"')orcontrol.values_[0].startswith("'"))ifcontrol.id_control[0]incontrol_type:ifflag:self.dialog_id.add(control.values_[1])else:self.dialog_id.add(control.values_[0])continueifstatement.block_typein("MENU"):pre_name=self.generate_menu_pre_name(statement.block_type,statement.block_id[0])self.menu_id.add(statement.block_id[0])forpopupinstatement.popups:self.add_popup_units(pre_name,popup)continueifstatement.block_typein("STRINGTABLE"):fortextinstatement.controls:self.string_table_id.add(text.id_control[0])continuelines=rcsrc.splitlines()forlineinlines:line=line.rstrip()m=re.match(r'(\w+)\s+(\bBITMAP\b|\bPNG\b|\bXML\b|\bICON\b)\s+(\".*\")$',line)ifnotm:continueself.img_id.add(m.group(1))defmain():x=RcParser(r'E:\tool\res\my.rc')print('\n'.join(sorted(x.id_by_parsing_rc())))if__name__=="__main__":main()
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
I want to parse Windows Resource File with
pyparsing
, becauseMenu
can have deep nesting structure. It would be very difficult to parse such structure with regular expressions.Everything works properly, but today I found that my code can only find one instance. To make me clear, here is the contents of *.rc file(E:\tool\res\my.rc, to save space only show the error prone part):
my Python code can't find IDR_MENU_PRPPERTIES MENU, the output now is:
but the expected output should be:
and here is my code: