[Pyparsing] How do fix this?
Brought to you by:
ptmcg
From: Robin S. <rob...@gm...> - 2015-10-31 00:28:13
|
I have some text I am trying to parse, specifically the track data. I have grammar defined and it works, however I am having the following problem: If the first track found is an audio track, I can retrieve both the audio and video tracks. However, if the first track is a video track, when I try to retrieve the tracks, I get the video track twice and no audio track. Here is my sample data: =============================================================================== File: file:///Users/test/Desktop/Media/Playback_Video_Format_Files/ipad_2_frontface_portrait.mov 5,674,352 bytes File Type Info: MajorBrand 'qt ' MinorVersion 0x00000000 1 compatible brands: 'qt ' Movie Data ('mdat'): 1 atom found, 5,665,002 data bytes com.apple.coremedia.formatreader.quicktime-iso Creation Date (date): 2011-06-14T16:06:30-0700 Movie Timescale: 600 Duration: 7444/600 00:00:12.407 Preferred rate: 1.00 Preferred Volume: 1.00 Movie Matrix: 1.0 0.0 0.0 / 0.0 1.0 0.0 / 0.0 0.0 1.0 Movie is self-contained. Not fast-start. QT user data atom available. QT metadata atom available. 2 tracks present. Track ID 1 vide (Video) Enabled Self-contained Format vide/avc1 dimensions: video 640 x 480, presentation: 640 x 480 (pixelAspect+clean), cleanAperture: 640 x 480 @ 0,0 (originTopLeft) Media Timescale: 600 Duration: 7447/600 00:00:12.412 MinSampleDuration: 19/600 AdvanceDecodeDelta: 0/600 00:00:00.000 Num data bytes: 5555600 Est. data rate: 3.581 Mbps Nominal framerate: 29.972 fps 372 samples Included in auto selection. Language code <und> Dimensions: 640 x 480 CleanAperture: 640 x 480 ProductionAperture: 640 x 480 EncodedPixels: 640 x 480 Track Matrix: 0.0 1.0 0.0 / -1.0 0.0 0.0 / 480.0 0.0 1.0 1 edit: Media start 0/600 00:00:00.000 dur 7444/600 00:00:12.407 Track start 0/600 00:00:00.000 dur 7444/600 00:00:12.407 QT metadata atom available. Track ID 2 soun (Audio) Enabled Self-contained Format soun/aac 44100 Hz aac FormatFlags: 0x00000000 Bytes/Pkt: 0 Frames/Pkt: 1024 Bytes/Frame: 0 Chan/Frame: 1 Bits/Chan: 0 Reserved: 0x00000000 ChannelLayout: Mono Media Timescale: 44100 Duration: 549888/44100 00:00:12.469 MinSampleDuration: 1024/44100 AdvanceDecodeDelta: 0/44100 00:00:00.000 Num data bytes: 99464 Est. data rate: 63.815 kbps Nominal framerate: 43.066 fps 537 samples Track volume: 1 Included in auto selection. Language code <und> Dimensions: 0 x 0 Track Matrix: 1.0 0.0 0.0 / 0.0 1.0 0.0 / 0.0 0.0 1.0 1 edit: Media start 0/44100 00:00:00.000 dur 7444/600 00:00:12.407 Track start 0/600 00:00:00.000 dur 7444/600 00:00:12.407 """ Here is a snippet of my grammar: # Start Track Info Block # Track Info self.crap = Suppress(Optional('[file]')) self.track_id = 'Track ID' + self.plain_number + self.word + '(' + self.word.setResultsName('type') + restOfLine # Track Format self.audio_track_format = 'Format' + Combine(self.word + '/' + self.word).setResultsName('track_format') + \ Combine(self.plain_number + self.word, adjacent=False).setResultsName('frequency') + \ self.word.setResultsName('codec') + restOfLine self.subtitle_track_format = 'Format' + Combine(self.word + '/' + self.word).setResultsName( 'track_format') + restOfLine self.vtrack_format = 'Format' + Combine(self.word + '/' + self.word).setResultsName('track_format') self.vtrack_dimensions = 'dimensions: video' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False).setResultsName('video') self.vtrack_presentation = Suppress(', ') + 'presentation:' + \ Combine(self.plain_number + 'x' + self.plain_number + '(' + self.word + '+' + self.word + ')' + Suppress(','), adjacent=False) \ .setResultsName('presentation') self.vtrack_cleanaperture = 'cleanAperture:' + Combine(self.plain_number + 'x' + self.plain_number + restOfLine, adjacent=False) \ .setResultsName('cleanAperture') self.video_track_format = self.vtrack_format + self.vtrack_dimensions + \ self.vtrack_presentation + self.vtrack_cleanaperture # Audio Channel Layout self.channel_layout = 'ChannelLayout:' + restOfLine.setResultsName('channel_layout') # Audio Track Volume self.audio_track_volume = 'Track volume:' + self.plain_number.setParseAction(lambda t: int(t[0])) \ .setResultsName('audio_volume') # Frame reordering self.frame = Optional(Combine('Frame') + restOfLine) # Included self.included = Combine('Included' + restOfLine) # Media Timescale self.timescale = Combine(self.word + 'Timescale:' + self.plain_number, adjacent=False) self.duration = Combine('Duration:' + self.div_num + self.time, adjacent=False) self.sample_duration = Combine('MinSampleDuration:' + self.div_num.setResultsName('sample_duration'), adjacent=False) self.decode_delta = Combine('AdvanceDecodeDelta:' + Combine(self.div_num + restOfLine) .setResultsName('decode_delta'), adjacent=False) self.media_timescale = self.timescale + self.duration + self.sample_duration + self.decode_delta # Track Data self.data_bytes = 'Num data bytes:' + self.plain_number.setResultsName('data_bytes') self.data_rate = 'Est. data rate:' + Combine(self.decimal_number + self.word, adjacent=False) \ .setResultsName('estimated_data_rate') self.frame_rate = 'Nominal framerate:' + Combine(self.decimal_number + self.word, adjacent=False) \ .setResultsName('fps') self.samples = self.plain_number.setResultsName('samples') self.track_data = self.data_bytes + self.data_rate + self.frame_rate + self.samples + restOfLine # Track Dimensions self.audio_dimensions = 'Dimensions:' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False) \ .setResultsName('Dimensions') self.audio_track_matrix = 'Track Matrix:' + restOfLine.setResultsName('track_matrix') self.audio_track_dimensions = self.audio_dimensions + self.audio_track_matrix self.subtitle_dimensions = 'Dimensions:' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False) \ .setResultsName('Dimensions') self.subtitle_track_matrix = 'Track Matrix:' + restOfLine.setResultsName('track_matrix') self.subtitle_track_dimensions = self.subtitle_dimensions + self.subtitle_track_matrix self.video_dimensions = 'Dimensions:' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False) \ .setResultsName('Dimensions') self.video_clean_aperture = 'CleanAperture: ' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False).setResultsName('CleanAperture') self.video_production_aperture = 'ProductionAperture:' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False).setResultsName( 'ProductionAperture') self.video_encoded_pixels = 'EncodedPixels:' + Combine(self.plain_number + 'x' + self.plain_number, adjacent=False).setResultsName('EncodedPixels') self.video_track_matrix = 'Track Matrix:' + restOfLine.setResultsName('TrackMatrix') self.video_track_dimensions = self.video_dimensions + self.video_clean_aperture + \ self.video_production_aperture + self.video_encoded_pixels + self.video_track_matrix # Edits self.num_edits = self.plain_number.setResultsName('number_of_edits').setParseAction(lambda t: int(t[0])) + \ Optional(Literal('edit:') | Literal('edits:')) self.media_start = 'Media start' + Optional(Group(self.div_num + self.time) | 'INVALID TIME') \ .setResultsName('MediaStart') self.media_duration = 'dur' + Group(self.div_num + self.time).setResultsName('MediaDuration') self.track_start = 'Track start' + Group(self.div_num + self.time).setResultsName('TrackStart') self.track_duration = 'dur' + Group(self.div_num + self.time).setResultsName('TrackDuration') self.media_edit = Group(self.media_start + self.media_duration + self.track_start + self.track_duration + restOfLine + LineEnd().suppress()) self.edits = self.num_edits + OneOrMore(self.media_edit).setResultsName('edits') # Define Track Info Block # Audio Block self.audio_track_info = Group(self.crap + self.track_id + self.audio_track_format + self.channel_layout + self.media_timescale + self.track_data + self.audio_track_volume + self.included + self.audio_track_dimensions + OneOrMore(self.edits) + Optional(self.qt_user_data)) # Subtitle Block self.subtitle_track_info = Group(self.crap + self.track_id + self.subtitle_track_format + \ self.media_timescale + self.track_data + self.included + \ self.subtitle_track_dimensions + OneOrMore(self.edits) + Optional(self.qt_user_data)) # Video Block self.video_track_info = Group(self.crap + self.track_id + self.video_track_format + self.media_timescale + self.track_data + self.frame + self.included + self.video_track_dimensions + OneOrMore(self.edits) + Optional(self.qt_user_data)) self.tracks = ZeroOrMore(self.audio_track_info | self.video_track_info | self.subtitle_track_info) \ .setResultsName('tracks') |