Work at SourceForge, help us to make it a better place! We have an immediate need for a Support Technician in our San Francisco or Denver office.

Close

Pylibpcap write to a file

Help
tom levier
2012-03-19
2013-05-09
  • tom levier
    tom levier
    2012-03-19

    I am doing research and trying to extract a particular TCP flow from a .PCAP file. I want to read the packets in from the pcap file and check the port number and if the port number matches the port number I am looking for I want to write it to  a new .pcap file.

    I am able to open the file and identify each packet's TCP port number that I want but once I have identified the packet has that port number I am having trouble writing that packet to a file.

    Here is my code:
    import pcap
    import dpkt, struct, socket, time
    from collections import defaultdict, OrderedDict

    #Global Variables

    #pcap file name
    pFileName = 'pcap1.pcap'
    streamFileName = 'stream.pcap'

    #debugging variables

    #prints out information as pcap is
    #parse. Be aware this will greatly
    #increase amount of time that the
    #it will take to complete the program.
    # 0 = OFF, 1 = ON
    debug = 0

    #only parse up to this many pkts
    # if -1, will parse entire file
    maxPktsToProc = -1

    #show number of packets parsed at
    #every n number of packets
    showProgressAt = 1000000

    #dictionary for unique source IPs  ##Look this up
    sIP_dict = defaultdict(lambda: 0)
    #dictionary for unique destination IPs
    dIP_dict = defaultdict(lambda: 0)

    #dictionary for unique source ports
    sPort_dict = defaultdict(lambda: 0)
    #dictionary for unique destination ports
    dPort_dict = defaultdict(lambda: 0)

    #dictionary for unique source MAC addresses
    sMAC_dict = defaultdict(lambda: 0)
    #dictionary for unique destination MAC addresses
    dMAC_dict = defaultdict(lambda: 0)

    #array to hold packet size distro info
    #buckets (bytes) = 0-100, 101-200, 201-300, 301-500, 401-500,
    # 501-600, 601-700, 701-800, 801-900, 901-1000, 1001-1100,
    # 1101-1200, 1201-1300, 1301-1400, 1401-1500
    sizeDistArr =

    #time program to execute python file
    #against pcap
    timeToProcessPCAP = 0
    #used to aid in the calc of timeToProcess
    previousTime = 0

    #total packets parsed in pcap file
    pktsCaptured = 0
    #total time of traced in pcap file
    totalTime = 0

    #hold timestamp of first packet
    traceBeginTime = 0

    #total number of Bytes include headers
    #of all packets parsed
    totalBytes = 0

    #list is a tuple of src and dest ip addresses
    ipList =

    #list is a tuple of TCP src and destination ports
    # and sequence numbers
    TCPList =

    #List is a tuple of ipList and tcpList
    #tcpFlowlist =

    #Number of packets where the data types
    #have not been implemented yet. Currently have
    #DLT_RAW, DLT_EN10MB
    FrameDataErr = 0
    #Number of frame packets where the data types
    #have not been implemented yet. Currently have
    #IP, ARP, IP6
    EtherDataTypeErr = 0
    #Number of IP packets where the data types
    #have not been implemented yet. Currently have
    #TCP, UDP, ICMP
    IPdataErr = 0
    #Number of error from dispacther function
    #in dpkt class
    totalDispatchErr = 0

    #Counters
    #total UDP packets processed
    totalUDPpkts = 0
    #total TCP packets processed
    totalTCPpkts = 0
    #total IP packets processed
    totalIPpkts = 0
    #total TCP packets with 443 as dest port
    totalSSLpkts = 0
    #total ICMP packets processed
    totalICMPpkts = 0

    #TCP port number to extract flow from
    portNo = 59952

    #packet processing counter
    i = 0

    #timer
    previousCounterTime = 0

    #function: main
    #primary engine for parsing pcap files

    def main():
        global totalTime, pktsCaptured, i, timeToProcessPCAP, EhterDataErr
        global totalDispatchErr, previousCounterTime

        #begin timing of application processing
        timeToProcessPCAP = time.clock()
        previousCounterTime = time.clock()
       
        #istantiating pcap object using python pcap library
        #wrapper (python libpcap0.6.2)
        p = pcap.pcapObject()

        #opens pcap file for reading in wrapper
        p.open_offline(pFileName)

    ##    filterPackets()

    ##    #extract TCP Flow information
    ##    getTCPFlow(portNo) 

        print "…………………..PCAP File Header Info…………………………\n"
        #print information from pcap header file
        print "pcap major version: " + str(p.major_version())
        print "pcap minor version: " + str(p.minor_version())
        #see pcap.py for all DLT names currently implemented
        print "data link (DL) type: " + str(dltName(p.datalink()))
        print "snap length: " + str(p.snapshot())
       
        #if frame type is EN10MB
        if p.datalink() == pcap.DLT_EN10MB:
            print "\nEthernet Packets Found….Processing"
            while(True):
                i += 1
                if (time.clock() - previousCounterTime) > 3:
                    print ".",
                    previousCounterTime = time.clock()
                #wrapper call, parses one ethernet packet at a time
                #then executes call back function.
                try:
                    if (p.dispatch(1,processEthPkts) != 1) or (i == maxPktsToProc):               
                        endOfProcessing()
                        break            
                except Exception as e:
                    totalDispatchErr += 1
                    if debug == 1: print e
                    continue
                if (i%showProgressAt) == 0:
                    print str(i) + " packets processed so far."
        #if frame type is RAW
        elif p.datalink() == pcap.DLT_RAW:
            print "\nRAW Packets….Processing"
            while(True):
                i += 1
                if (time.clock() - previousCounterTime) > 3:
                    print ".",
                    previousCounterTime = time.clock()           
                #wrapper call, parses one raw IP packet at a time
                #then executes call back function.               
                try:
                    if (p.dispatch(1,processIPPkts) != 1 or (i == maxPktsToProc)) :
                        endOfProcessing()
                        break
                except Exception as e:
                    totalDispatchErr += 1               
                    if debug == 1: print e
                    continue
                if (i%showProgressAt) == 0:
                    print str(i) + " packets processed so far."

        elif p.datalink() == pcap.DLT_IEEE802_11:
            print "\n processing wireless packets "
        else:
            #there are over 16 other types in the wrapper not implemented for this lab
            #see pcap.py for other types.:
            print "Frame Data Not Implemented, cannot continue."
            FrameDataErr += 1

        print ipList
        print TCPList

    # takes in a port number and ip data
    # and isolates packets from the same flow
    ##def getTCPFlow(portNo, ipData):
    ##    pktCount = 0 #tracks number of packets in flow
    ##
    ##    flowno = dpkt.TCP
    ##
    ##def ethaddr(addr):
    ##    addrcode =
    ##    print addr
    ##    return "".join(addrcode)
    ##
    ##def ipv4addr(addr):
    ##    addrcode =
    ##    return "".join(addrcode)
    ##
    ##def filterPackets():
    ##    fileName = "pcap1.pcap"
    ##    target_sport = 59952
    ##    pcr = dpkt.pcap.Reader(open(fineName))
    ##    ip = dpkt.ip.IP(src=ipv4addr('172.20.196.33'), dst=ipv4addr('172.20.192.1'), p=4)
    ##    eth = dpkt.ethernet.Ethernet(type=dpkt.ethernet.ETH_TYPE_IP)
    ##
    ##    pcw = dpkt.pcap.Writer(open('gre_'+fileName,'wb')

    #pointed to from dispatch
    #dispatch passes packet length, binary data, and time stamp to
    #function.
           
    def processIPPkts(pktlen, data, timestamp):
        global totalTime, previousTime, pktsCaptured, totalBytes, i
        global traceBeginTime

        #pcapw = dpkt.pcap.Writer(open('stream1.pcap', 'wb'))
    ##    pcapw = open('stream1.pcap', 'wb')

        #print start time of trace
        if i == 1:
            traceBeginTime = timestamp
       
        #increment total number of bytes global variable
        totalBytes += pktlen

        #fill array for distribution analysis
        sizeDistro(pktlen)

        #running calculation of time of capture
        if pktsCaptured != 0:
            totalTime += (timestamp - previousTime)
        previousTime = timestamp

        #increment packets captured
        pktsCaptured += 1

        #process IP packet data function call
        #pass raw IP data from frame packet
        processIPdata(dpkt.ip.IP(data))

    #pointed to from dispatch
    #dispatch passes packet length, binary data, and time stamp to
    #function.
       
    def processEthPkts(pktlen, data, timestamp):
        global totalTime, previousTime, pktsCaptured, totalBytes
        global EtherDataTypeErr, i, traceBeginTime

        #open file for writing
        pcapw = dpkt.pcap.Writer(open('stream1.pcap', 'wb'))

        #print start time of trace
        if i == 1:
            traceBeginTime = timestamp
                       
        #increment total number of bytes global variable
        totalBytes += pktlen

        #fill array for distribution analysis
        sizeDistro(pktlen)

        #running calculation of time of capture
        if pktsCaptured != 0:
            totalTime += (timestamp - previousTime)
        previousTime = timestamp

        #increment pkts captured
        pktsCaptured += 1
      
        #extra raw ethernet data from frame packet
        ##    eth = dpkt.ethernet.Ethernet(data)
        eth = dpkt.ethernet.Ethernet(type=dpkt.ethernet.ETH_TYPE_IP)
        eth1 = dpkt.ethernet.Ethernet(data)
       
        #find src and dst mac address
        src_mac = decode_mac(eth1.src)
        dst_mac = decode_mac(eth1.dst)

        #print "mac: " + src_mac

        #add MACs to hash table
        sMAC_dict += 1
        dMAC_dict += 1

        tcp = dpkt.tcp.TCP
        ip_src = socket.inet_ntoa(eth1.data.src)
        #print ip_src
        ip_dst = socket.inet_ntoa(eth1.data.dst)
        ip = dpkt.ip.IP(src=ip_src, dst=ip_dst, p = 6)
        eth.src = src_mac
        eth.dst = dst_mac
        #print eth1.data.data.sport
       

        #print simple packet information for debugging
        if debug == 1:
            print "src MAC: " + src_mac
            print "dst MAC: " + dst_mac

       
           
        if eth1.data.data.sport == 59952:
    ##        eth_in = dpkt.ethernet.ETHERNET(data)
    ##        tcp.data = eth_in.data
            tcp.data = eth1.data
            print tcp.sport
            ip.sum = 0
            ip.data = tcp
            ip.len = pktlen
            eth.data = ip
            print socket.inet_ntoa(eth1.data.src)
            pcapw.writepkt(eth1, timestamp) #write eth and timestamp
            print "printed packet"
            pcapw.close()
           
           

               
        #if data is IP packets
        if eth.type == dpkt.ethernet.ETH_TYPE_IP:
            processIPdata(eth.data)
        #if data is ARP
        elif eth.type == dpkt.ethernet.ETH_TYPE_ARP:
            processARPdata(eth.data)
        #if data is IP6 packets
        elif eth.type == dpkt.ethernet.ETH_TYPE_IP6:
            #print "using IPV6 packets\n"  #just for debugging
            processIP6data(eth.data)
        #if data is anyother type of packet
        else:
            EtherDataTypeErr += 1
            if debug == 1:
                print "\n***********Missed a Ether Data Type (" + hex(eth.type) + ") at at packet " + str(i)

     
  • Wim Lewis
    Wim Lewis
    2012-03-26

    Libpcap can write arbitrary packets to a file but IIRC py-libpcap doesn't expose that functionality. It probably wouldn't be hard to add-- I could give you some pointers if you want to go that route.

    However if the filter expression is simple enough you could just use "tcpdump -r infile -w outfile" to produce a filtered capture file.

    Alternately you could write the pcap output file "by hand". The format is quite simple, see http://wiki.wireshark.org/Development/LibpcapFileFormat for a description.

     
  • tom levier
    tom levier
    2012-03-26

    Is there something like pypcap or scapy that would be better for writing the new file?  I have tried dpkt.pcap.writer but was creating malformed packets.  It wrote the Ethernet and IP data but would not write any of the TCP data and I had trouble parsing the TCP source port

    Here is the code where I attempted that.
    ##  Authors:  Tom LeVier and Todd Sehl
    ##  Date:  23 March 2012
    ##  File Name: Flow_manipulator.py
    ##  Class:  Networks II (CS4550)
    ##  Professor Xie
    ##
    ##  Purpose:  The purpose of this program is to
    ##  read in a .pcap file and seperate a TCP flow
    ##  using a target port number.  Any packets with
    ##  port numbers that match the target port number
    ##  are written to a new file
    ##
    ##  Uses Python 2.7 on ubuntu 11.10
    ##
    ##  Input: pcap file designated in main() by filename
    ##
    ##  Output: pcap file 'tcp_' + filename

    import sys
    import dpkt
    import pcap
    import socket

    ##  This method splits up an ethernet address and convets from hex
    ##  to chars so it is readable
    def ethaddr(addr):
        addrcode =
       # print addr
        return "".join(addrcode)

    ##  This splits up an IP address and converts it from in to char
    ##  so it is readable
    def ipv4addr(addr):
        addrcode =
        return "".join(addrcode)

    def main():
        #input filename
        filename = "pcap1.pcap"
        tgt_port = 59952

        #open capture file to read
        pcr = dpkt.pcap.Reader(open(filename))

        #var to access TCP data
        tcp = dpkt.tcp.TCP

        #specify IP src and dst address for packets being written
        ip = dpkt.ip.IP(src=ipv4addr('172.20.196.33'), dst=ipv4addr('172.20.192.1'), p=6)

        # vars to track ethernet and ip data
        ip = dpkt.ip.IP
        eth = dpkt.ethernet.Ethernet(type=dpkt.ethernet.ETH_TYPE_IP)
       
        #specify ethernet src and dst addresses for writing packets   
        eth.src=ethaddr('68:a3:c4:71:54:39')
        eth.dst=ethaddr('00:90:0b:0a:00:3c')
       
        #set TCP data for new packets
        #this is not working yet
        tcp.sport = 59952
        tcp.dport = 443
        tcp.seq = 356
       
        #open a file to write data to
        pcw = dpkt.pcap.Writer(open('tcp_'+filename,'wb'))
       
    ##  read in packet data using timestamp and buffer 
        for ts, buf in pcr:
            #read ethernet data from the buffer
            eth_in = dpkt.ethernet.Ethernet(buf)

            #get tcp data
            tcp.data = eth_in.data

            #specify IP checksum
            ip.sum = dpkt.ip.IP.sum

            #get IP data
            ip.data = tcp

            #increment sequence number
            tcp.seq = dpkt.tcp.seq

            #specify header length
            ip.len = dpkt.ip.IP.hl

            #get ethernet data
            eth.data = ip

            #get src and dst ports
            #does not work yet
            src_port = tcp.sport
            dst_port = tcp.dport

            # if source port matches the target port
            # write the packets to a file
            # for some reason sees all ports as 59952
            # gets error if we use anything else
            try:
                if tcp.sport == tgt_port:
                    pcw.writepkt(eth,ts)
            except Exception as e:
                print e
                continue
        #close the file   
        pcw.close
        pass

    if __name__ == '__main__':
        main()