抓HTTP包的时候发现很多网站的响应报文是gzip压缩后的数据,存放在一个或多个chunk里面(参见HTTP响应报文中的chunked)。这些gzip数据是不可阅读的,需要进行解压。一开始在网上找到了一份可以正常运行的代码,贴出来:
http://hi.baidu.com/xzq2000/blog/item/c5429f2fd6a646301f308991
char* ungzip(char* source,int len)
{
int err;
z_stream d_stream;
Byte compr[segment_size]={0}, uncompr[segment_size*4]={0};
memcpy(compr,(Byte*)source,len);
uLong comprLen, uncomprLen;
comprLen = sizeof(compr) / sizeof(compr[0]);
uncomprLen = 4*comprLen;
strcpy((char*)uncompr, "garbage");
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = compr;
d_stream.avail_in = 0;
d_stream.next_out = uncompr;
err = inflateInit2(&d_stream,47);
if(err!=Z_OK)
{
}
while (d_stream.total_out < uncomprLen
&& d_stream.total_in
< comprLen) {
d_stream.avail_in = d_stream.avail_out = 1;
err = inflate(&d_stream,Z_NO_FLUSH);
}
err = inflateEnd(&d_stream);
if(err!=Z_OK)
{
}
char* b = new char[d_stream.total_out+1];
memset(b,0,d_stream.total_out+1);
memcpy(b,(char*)uncompr,d_stream.total_out);
return b;
}
后来看了zlib usage example(参见zlib使用范例),模仿写了一段代码,可以正常运行,而且感觉比上面的代码要快,因为上面的代码把z_stream的avail_in和avail_out都设为1了,只能一字节一字节地解压,非常慢。
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <zlib.h>
#define segment_size 1460//largest tcp data segment
int ungzip(char* source,int len,char*des)
{
int ret,have;
int offset=0;
z_stream d_stream;
Byte compr[segment_size]={0}, uncompr[segment_size*4]={0};
memcpy(compr,(Byte*)source,len);
uLong comprLen, uncomprLen;
comprLen
=len;//一开始写成了comprlen=sizeof(compr)以及comprlen=strlen(compr),后来发现都不对。
//sizeof(compr)永远都是segment_size,显然不对,strlen(compr)也是不对的,因为strlen只算到\0之前,
//但是gzip或者zlib数据里\0很多。
uncomprLen = segment_size*4;
strcpy((char*)uncompr, "garbage");
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.next_in =
Z_NULL;//inflateInit和inflateInit2都必须初始化next_in和avail_in
d_stream.avail_in = 0;//deflateInit和deflateInit2则不用
ret = inflateInit2(&d_stream,47);
if(ret!=Z_OK)
{
}
d_stream.next_in=compr;
d_stream.avail_in=comprLen;
do
{
offset+=have;
}while(d_stream.avail_out==0);
inflateEnd(&d_stream);
memcpy(des+offset,"\0",1);
return ret;
}


加载中…