larbin源码之global.h

 1 /** This represent a connection : we have a fixed number of them
 2 * fetchOpen links them with servers
 3 * fetchPipe reads those which are linked
 4 */
 5 //表达连接相关信息
 6 struct Connexion {
 7     char state;      // socket状态 : EMPTY, CONNECTING, WRITE, OPEN
 8     int pos;         //请求发送位置标记
 9     FetchError err;  // 连接如何终止,enum
10     int socket;      // socket descriptor number
11     int timeout;  // 连接超时时间设置
12     LarbinString request;  // http请求报头
13     file *parser;    // 解析连接 (a robots.txt or an html file)
14     char buffer[maxPageSize];//下载的网页数据
15     Connexion();//初始化state=emptyC,parser=NULL
16     ~Connexion();//不执行,一旦执行就出错,assert(false)
17     void recycle();//释放*parser,再次初始化request
18 };

该结构体的主要两个类成员LarbinStrng,file。

LarbinString类(string.h声明,string.cc实现),主要是对字符串http报头的字符串相关操作

class LarbinString {
private:
    char *chaine;    //http报头字符串
    uint pos;    //http报头当前位置标记
    uint size;    //http报头大小
public:
    LarbinString(uint size = STRING_SIZE);    //初始化*chaine,pos=0,size
    ~LarbinString();        //释放*chaine
    void recycle(uint size = STRING_SIZE);    //重新分配*chaine
    char *getString();        //返回*chaine
    char *giveString();        //返回*chaine的拷贝
    void addChar(char c);    //chaine[pos]=c
    void addString(char *s);    //添加*s到*chine后(pos起)
    void addBuffer(char *s, uint len);    //添加*s到*chine后(pos起)
    inline uint getLength() { return pos; };    //return pos
    inline char operator [] (uint i)    //数组[]运算符重载
    void setChar(uint i, char c);    //chaine[i]=c
};

file类,(html类,robots类),二者继承file类。解析连接(robots.txt or .html file)

class file {
protected:
    char *buffer;    //connexion中的buffer[maxPageSize(100000)],下载的网页数据
    char *posParse;    //解析位置
public:
    file(Connexion *conn);    //初始化*buffer=*posParse=conn->buffer;pos=0
    virtual ~file();
    bool isRobots;    // Is it a robots.txt
    uint pos;    //*buffer的当前位置
    // a string arrives from the server
    virtual int inputHeaders(int size) = 0; // just parse headers
    virtual int endInput() = 0;
};

class html : public file {
private:
    url *here;    //url地址
    char *area;    //当前感兴趣区的起始位置
    char *contentStart;    //真正内容的起始位置,报头之后的内容
    url *base;    //url基地址
    /* manage a new url : verify and send it */
    void manageUrl(url *nouv, bool isRedir);