#ifndef _BaseHtmlParser_H_
#define _BaseHtmlParser_H_
#include <vector>
#include <string>
using namespace std;
class CBaseHtmlParser
{
public:
//!struct to save a string, with a pointer and a size
struct SZ_STRING
{
const char* pbData;
size_t cbData;
};
public:
CBaseHtmlParser();
virtual ~CBaseHtmlParser();
//Init or Destroy, do nothing here now
virtual void
Initialize(){}
virtual void
Destroy(){}
//Two interface to parser html page
virtual bool Parse(const string& URL,const
string& Content);
virtual bool Parse(const SZ_STRING
&strUrl,const SZ_STRING
&strContent);
//Util api for get a absolute url based on current
page
void Relativity2AbsoluteURL(string& URL);
//event when a tag begin, such as '<a
href=..' , then strTagName is 'a', Attribs contains 'href'
virtual void OnStartTag(const SZ_STRING &
strTagName,vector<
pair<SZ_STRING,SZ_STRING>
>
Attribs){};
//event when a tag close, such as
'</a>', then strTagName is 'a'
virtual void OnEndTag(const SZ_STRING &
strTagName){};
//event when text between tags, such as
'<>hello<>',
then strData is 'hello'
virtual void OnData(const SZ_STRING &
strData){};
//event when script or comment, such as '<!--
.../-->' or '<script
..> </script>'
virtual void OnComment(const SZ_STRING &
strComment){};
private:
char m_szBaseURL[1024];
char m_szBaseDomain[1024];
};
#endif
加载中,请稍候......