注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

老和山小和尚

敬天爱人

 
 
 

日志

 
 
 
 

Ragel for http example (Mongrel HTTP 1/1)  

2009-09-21 13:47:29|  分类: 协议研究 |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |
ragel对http协议解析的简单例子

ragel对协议的定义摘自mongrel-1.1.5

1.所需文件
ragel对http协议的一般定义:

%%{
  
  machine http_parser_common;
 
#### HTTP PROTOCOL GRAMMAR
# line endings
  CRLF = "\r\n";
 
# character types
  CTL = (cntrl | 127);
  safe = ("$" | "-" | "_" | ".");
  extra = ("!" | "*" | "'" | "(" | ")" | ",");
  reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
  unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
  national = any -- (alpha | digit | reserved | extra | safe | unsafe);
  unreserved = (alpha | digit | safe | extra | national);
  escape = ("%" xdigit xdigit);
  uchar = (unreserved | escape);
  pchar = (uchar | ":" | "@" | "&" | "=" | "+");
  tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
 
# elements
  token = (ascii -- (CTL | tspecials));
 
# URI schemes and absolute paths
  scheme = ( alpha | digit | "+" | "-" | "." )* ;
  absolute_uri = (scheme ":" (uchar | reserved )*);
 
  path = ( pchar+ ( "/" pchar* )* ) ;
  query = ( uchar | reserved )* %query_string ;
  param = ( pchar | "/" )* ;
  params = ( param ( ";" param )* ) ;
  rel_path = ( path? %request_path (";" params)? ) ("?" %start_query query)?;
  absolute_path = ( "/"+ rel_path );
 
  Request_URI = ( "*" | absolute_uri | absolute_path ) >mark %request_uri;
  Fragment = ( uchar | reserved )* >mark %fragment;
  Method = ( upper | digit | safe ){1,20} >mark %request_method;
 
  http_number = ( digit+ "." digit+ ) ;
  HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
  Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " " HTTP_Version CRLF ) ;
 
  field_name = ( token -- ":" )+ >start_field %write_field;
 
  field_value = any* >start_value %write_value;
 
  message_header = field_name ":" " "* field_value :> CRLF;
 
  Request = Request_Line ( message_header )* ( CRLF @done );
 
main := Request;
 
}%%

ragel对http协议的解析过程定义(我发现代码库在经常改动,我这边的测试代码以此文为准):

#include "http11_parser.h"
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
 
 
#define LEN(AT, FPC) (FPC - buffer - parser->AT)
#define MARK(M,FPC) (parser->M = (FPC) - buffer)
#define PTR_TO(F) (buffer + parser->F)
 
/** Machine **/
 
%%{
  
  machine http_parser;
 
  action mark {MARK(mark, fpc); }
 
 
  action start_field { MARK(field_start, fpc); }
  action snake_upcase_field { snake_upcase_char((char *)fpc); }
  action write_field {
    parser->field_len = LEN(field_start, fpc);
  }
 
  action start_value { MARK(mark, fpc); }
  action write_value {
    if(parser->http_field != NULL) {
      parser->http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, fpc));
    }
  }
  action request_method {
    if(parser->request_method != NULL)
      parser->request_method(parser->data, PTR_TO(mark), LEN(mark, fpc));
  }
  action request_uri {
    if(parser->request_uri != NULL)
      parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, fpc));
  }
  action fragment {
    if(parser->fragment != NULL)
      parser->fragment(parser->data, PTR_TO(mark), LEN(mark, fpc));
  }
 
  action start_query {MARK(query_start, fpc); }
  action query_string {
    if(parser->query_string != NULL)
      parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, fpc));
  }
 
  action http_version {
    if(parser->http_version != NULL)
      parser->http_version(parser->data, PTR_TO(mark), LEN(mark, fpc));
  }
 
  action request_path {
    if(parser->request_path != NULL)
      parser->request_path(parser->data, PTR_TO(mark), LEN(mark,fpc));
  }
 
  action done {
    parser->body_start = fpc - buffer + 1;
    if(parser->header_done != NULL)
      parser->header_done(parser->data, fpc + 1, pe - fpc - 1);
    fbreak;
  }
 
  include http_parser_common "http11_parser_common.rl";
 
}%%
 
/** Data **/
%% write data;
 
int http_parser_init(http_parser *parser) {
  int cs = 0;
  %% write init;
  parser->cs = cs;
  parser->body_start = 0;
  parser->content_len = 0;
  parser->mark = 0;
  parser->nread = 0;
  parser->field_len = 0;
  parser->field_start = 0;
 
  return(1);
}
 
 
/** exec **/
size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off) {
  const char *p, *pe;
  int cs = parser->cs;
 
  assert(off <= len && "offset past end of buffer");
 
  p = buffer+off;
  pe = buffer+len;
 
  /* assert(*pe == '\0' && "pointer does not end on NUL"); */
  assert(pe - p == len - off && "pointers aren't same distance");
 
  %% write exec;
 
  if (!http_parser_has_error(parser))
    parser->cs = cs;
  parser->nread += p - (buffer + off);
 
  assert(p <= pe && "buffer overflow after parsing execute");
  assert(parser->nread <= len && "nread longer than length");
  assert(parser->body_start <= len && "body starts after buffer end");
  assert(parser->mark < len && "mark is after buffer end");
  assert(parser->field_len <= len && "field has length longer than whole buffer");
  assert(parser->field_start < len && "field starts after buffer end");
 
  return(parser->nread);
}
 
int http_parser_finish(http_parser *parser)
{
  if (http_parser_has_error(parser) ) {
    return -1;
  } else if (http_parser_is_finished(parser) ) {
    return 1;
  } else {
    return 0;
  }
}
 
int http_parser_has_error(http_parser *parser) {
  return parser->cs == http_parser_error;
}
 
int http_parser_is_finished(http_parser *parser) {
  return parser->cs >= http_parser_first_final;
}
 


头文件,包含结构体及函数声明:

#ifndef http11_parser_h
#define http11_parser_h
 
#include <sys/types.h>
 
#if defined(_WIN32)
#include <stddef.h>
#endif
 
typedef void (*element_cb)(void *data, const char *at, size_t length);
typedef void (*field_cb)(void *data, const char *field, size_t flen, const char *value, size_t vlen);
 
typedef struct http_parser {
  int cs;
  size_t body_start;
  int content_len;
  size_t nread;
  size_t mark;
  size_t field_start;
  size_t field_len;
  size_t query_start;
 
  void *data;
 
  field_cb http_field;
  element_cb request_method;
  element_cb request_uri;
  element_cb fragment;
  element_cb request_path;
  element_cb query_string;
  element_cb http_version;
  element_cb header_done;
  
} http_parser;
 
int http_parser_init(http_parser *parser);
int http_parser_finish(http_parser *parser);
size_t http_parser_execute(http_parser *parser, const char *data, size_t len, size_t off);
int http_parser_has_error(http_parser *parser);
int http_parser_is_finished(http_parser *parser);
 
#define http_parser_nread(parser) (parser)->nread
 
#endif

测试文件:
http_server.c

#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "http11_parser.h"
#include <ctype.h>

#define BUFF_LEN 4096

void http_field(void *data, const char *field, 
        size_t flen, const char *value, size_t vlen)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, field, flen);
    strcat(buff, ": ");
    strncat(buff, value, vlen);

    printf("HEADER: \"%s\"\n", buff);
}

void request_method(void *data, const char *at, size_t length)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, at, length);

    printf("METHOD: \"%s\"\n", buff);
}

void request_uri(void *data, const char *at, size_t length)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, at, length);

    printf("URI: \"%s\"\n", buff);
}

void fragment(void *data, const char *at, size_t length)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, at, length);

    printf("FRAGMENT: \"%s\"\n", buff);
}

void request_path(void *data, const char *at, size_t length)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, at, length);

    printf("PATH: \"%s\"\n", buff);
}

void query_string(void *data, const char *at, size_t length)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, at, length);

    printf("QUERY: \"%s\"\n", buff);
}

void http_version(void *data, const char *at, size_t length)
{
    char buff[BUFF_LEN] = {0};

    strncpy(buff, at, length);

    printf("VERSION: \"%s\"\n", buff);
}

void header_done(void *data, const char *at, size_t length)
{
    printf("done.\n");
}

void parser_init(http_parser *hp) 
{
  hp->http_field = http_field;
  hp->request_method = request_method;
  hp->request_uri = request_uri;
  hp->fragment = fragment;
  hp->request_path = request_path;
  hp->query_string = query_string;
  hp->http_version = http_version;
  hp->header_done = header_done;
  http_parser_init(hp);
}

int main ()
{
    char *data = "GET / HTTP/1.0\r\n" 
        "User-Agent: Wget/1.11.4\r\n" 
        "Accept: */*\r\n"
        "Host: www.163.com\r\n"
        "Connection: Keep-Alive\r\n"
        "\r\n";
    size_t dlen;
    http_parser parser, *hp;

    hp = &parser;
    dlen = strlen(data);

    parser_init(hp);

    http_parser_execute(hp, data, dlen, 0);

    return 0;
}

2. 生成c语言文件及编译:
#可能编译的时候会出现write eof的错误,好像新版本的ragel已经没有这个命令了,你可以把文件中出现write eof命令都删掉。
ragel -G2 http11_parser.rl
gcc -g -Wall http11_server.c http11_parser.c -o server

3.测试:
./server
METHOD: "GET"
PATH: "/"
URI: "/"
VERSION: "HTTP/1.0"
HEADER: "User-Agent: Wget/1.11.4"
HEADER: "Accept: */*"
HEADER: "Host: www.163.com"
HEADER: "Connection: Keep-Alive"
done.

4.异步的测试例子:
在一般的服务器的IO中,现在很多时候是非阻塞方式的,所以HTTP头部不一定能一次读取完全,所以可能需要多次调用http_parser_execute函数,这时上面的例子就变成了:
int main ()
{
    char data[4096] = "GET / HTTP/1.0\r\n"
        "User-Agent: Wget/1.11.4\r\n"
        "Accept: */*\r\n"
        "Host: www.163.com\r\n"
        "Connection: Keep-Alive\r\n"
        "\r\n";
    size_t dlen, dlen1;
    http_parser parser, *hp;
    int i;

    hp = &parser;
    dlen = strlen(data);

    for (i = 1; i < dlen; i++) {
        parser_init(hp);
        dlen1 = http_parser_execute(hp, data, i, 0);
        dlen1 = http_parser_execute(hp, data, dlen, dlen1);
    }

    return 0;
}
我测试了从读入1到结束所有可能出现中断的情况,编译发现原有的函数可以支持异步读入(http_parser.rl中要求每次读入的数据结尾一定是\0这句assert程序可能需要去掉,不然会报错)。
  评论这张
 
阅读(2648)| 评论(3)
推荐 转载

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017