加载中…
个人资料
  • 博客等级:
  • 博客积分:
  • 博客访问:
  • 关注人气:
  • 获赠金笔:0支
  • 赠出金笔:0支
  • 荣誉徽章:
正文 字体大小:

python解析pcap文件(五元组以及流分类)

(2013-11-01 16:52:56)
标签:

python

五元组

flow

pcap

open

分类: python学习

----------------------------------operations.readpcapfile.py--------------------------------------

'''
其中
decode_PcapFileHeader(B_datastring)是解析pcap文件头的,得不到什么可用的
decode_PcapDataPcaket(B_datastring)是解析每一包数据的,主要获得数据包数据
rdpcap(fileName)是读取文件的,它的返回值是一个列表,
列表的大小总共数据包的数目,里面每一个元素也是一个列表:
[[第一包头,第一包数据],[第二包头,第二包数据]..[第n包头,第n包数据]]

最后的display_hexdata(frame_data)是为了实现数据包的16进制显示。
'''

 

# !/usr/bin/python

#coding=utf-8


import struct

#no use
def decode_PcapFileHeader(B_datastring):

    """  

        4 bytes       2 bytes     2 bytes     4 bytes    4 bytes   4 bytes   4 bytes

        ------------------------------------------------------------------------------

Header  | magic_num | ver_major | ver_minor | thiszone | sigfigs | snaplen | linktype|

        ------------------------------------------------------------------------------

    """

    header = {}
    header['magic_number'] = B_datastring[0:4]
    header['version_major'] = B_datastring[4:6]
    header['version_minor'] = B_datastring[6:8]
    header['thiszone'] = B_datastring[8:12]
    header['sigfigs'] = B_datastring[12:16]
    header['snaplen'] = B_datastring[16:20]
    header['linktype'] = B_datastring[20:24]

    return header

 
 
def decode_PcapDataPcaket(B_datastring):

    """  

          4 bytes    4 bytes    4 bytes 4 bytes  

        ----------------------------------------------

Packet  | GMTtime | MicroTime | CapLen | Len |  Data |

        ----------------------------------------------

       |------------Packet Header----------|

    """

    packet_num = 0
    packet_data = []
    header = {}
    data = ''
    i = 24
 
    while(i<len(B_datastring)):
       header['GMTtime'] = B_datastring[i:i+4]
       header['MicroTime'] = B_datastring[i+4:i+8]
       header['CapLen'] = B_datastring[i+8:i+12]
       header['Len'] = B_datastring[i+12:i+16]
 
       # the len of this packet
       packet_len = struct.unpack('I',header['CapLen'])[0]
       # the data of this packet
       data = B_datastring[i+16:i+16+packet_len]
       # save this packet data
       packet_data.append((header,data))
       i = i + packet_len+16
       packet_num += 1
         
    return packet_data

  

def rdpcap(fileName):

    filepcap = open(fileName,'rb')
    string_data = filepcap.read()
    packet_data = decode_PcapDataPcaket(string_data)
    return packet_data
 

 # transform like '\x01\x0e\0xb0' to '0x010eb0'

def str_to_hex(strs):
              hex_data =''
             
              for i in range(len(strs)):
                     #tem = ord(strs[i])
                     tem = hex(strs[i])
                     if len(tem)==3:
                            tem = tem.replace('0x','0x0')
                     tem = tem.replace('0x','')
                     hex_data = hex_data+tem

              return '0x'+hex_data

 

#display first count of the packets

def show_data(array,count):
   
    k=0
   
    for m in range(len(array)):
   
        display_data = str_to_hex(array[m][1])
       
        print(m+1)
   
        for i in range(len(display_data)):
            print(display_data[i],end="")
        print()
       
        k+=1
       
        if(k>=count): break
  
  
def show_five_tuple(array,count):
   
    k=0
   
    for m in range(len(array)):
   
        display_data = str_to_hex(array[m][1])
       
        print(m+1)
   
        #TCP/UDP,48-49
        print("Protocol type: \t",display_data[48:50])
        #sourceIP,54-61
        print("SourceIP: \t",display_data[54:62],end="")
        sip1=int(display_data[54]+display_data[55],16)
        sip2=int(display_data[56]+display_data[57],16)
        sip3=int(display_data[58]+display_data[59],16)
        sip4=int(display_data[60]+display_data[61],16)
        print("(",sip1,":",sip2,":",sip3,":",sip4,")")
        #DestinationIP
        print("DestinationIP: \t",display_data[62:70],end="")
        dip1=int(display_data[62]+display_data[63],16)
        dip2=int(display_data[64]+display_data[65],16)
        dip3=int(display_data[66]+display_data[67],16)
        dip4=int(display_data[68]+display_data[69],16)
        print("(",dip1,":",dip2,":",dip3,":",dip4,")")
        #sourcePort
        print("SourcePort: \t",display_data[70:74],end="")
        print("(",int(display_data[70]+display_data[71]+display_data[72]+display_data[73],16),")")
        #DestinationPort
        print("DestinationPort: ",display_data[74:78],end="")
        print("(",int(display_data[74]+display_data[75]+display_data[76]+display_data[77],16),")")
       
        print()
       
        k+=1
       
        if(k>=count): break
   
   
   
def classifyByft(array,count):
   
    k=0
    my_list=[]
   
    for m in range(len(array)):
   
        display_data = str_to_hex(array[m][1])
       
        print(m+1)
        temp1=display_data[48:50]+display_data[54:62]+display_data[62:70]+display_data[70:74]+display_data[74:78]
        print(temp1)
       
        my_list.append(temp1)
       
        k+=1
       
        if(k>=count): break
       
    #the five-tuple are no repeat
    my_set=set(my_list)
    #print(my_list)
    print(my_set)
   
    k=0
    flow_list=[]
   
    for n in range(len(my_set)):
        temp=my_set.pop()
        temp_list=[]
       
        for m in range(len(array)):
            display_data = str_to_hex(array[m][1])
            temp1=display_data[48:50]+display_data[54:62]+display_data[62:70]+display_data[70:74]+display_data[74:78]
            if(temp1==temp):
                temp_list.append(m+1)
            k+=1
            if(k>=count): k=0;break
       
        #print(temp_list)
        flow_list.append(temp_list)
   
    print()
               
    for i in range(len(flow_list)):
        print(flow_list[i])
       
       


       

 

 

------------------------------------------main.py----------------------------------------

#!/usr/bin/python
from operations.readpcapfile import *

 

#'c:\\Python25\\qq.pcap'
packet_data = rdpcap('c:\\traffic.pcap')#mail.pcap')

#show_five_tuple(packet_data,len(packet_data))
classifyByft(packet_data,len(packet_data))

#classifyByft(packet_data,100)


       

0

阅读 收藏 喜欢 打印举报/Report
  

新浪BLOG意见反馈留言板 欢迎批评指正

新浪简介 | About Sina | 广告服务 | 联系我们 | 招聘信息 | 网站律师 | SINA English | 产品答疑

新浪公司 版权所有