Changeset 110

Show
Ignore:
Timestamp:
02/06/2007 00:37:16 (22 months ago)
Author:
why
Message:
  • ext/hpricot_scan/hpricot_common.rl: hurray, the grammar shared between Java and C is right here.
Location:
trunk/ext/hpricot_scan
Files:
1 added
2 modified

Legend:

Unmodified
Added
Removed
  • trunk/ext/hpricot_scan/hpricot_scan.java.rl

    r109 r110  
    181181  } 
    182182 
    183   # 
    184   # HTML tokens 
    185   # (a blatant rip from HTree) 
    186   # 
    187   newline = '\n' @{curline += 1;} ; 
    188 # qtext = '"' ( '\"' | [^\n"] )* '"' | "'" ( "\\'" | [^\n'] )* "'" ;  
    189   NameChar = [\-A-Za-z0-9._:?] ; 
    190   Name = [A-Za-z_:] NameChar* ; 
    191   StartComment = "<!--" ; 
    192   EndComment = "-->" ; 
    193   StartCdata = "<![CDATA[" ; 
    194   EndCdata = "]]>" ; 
    195  
    196   NameCap = Name >_tag %tag; 
    197   NameAttr = NameChar+ >_akey %akey ; 
    198   Q1Attr = [^']* >_aval %aval ; 
    199   Q2Attr = [^"]* >_aval %aval ; 
    200   UnqAttr = ( space >_aval | [^ \t\n<>"'] >_aval [^ \t\n<>]* %aunq ) ; 
    201   Nmtoken = NameChar+ >_akey %akey ; 
    202  
    203   Attr =  NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; 
    204   AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; 
    205   AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; 
    206   StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; 
    207   EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; 
    208  
    209   EndTag = "</" NameCap space* ">" ; 
    210   XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; 
    211   XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; 
    212   XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; 
    213   XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; 
    214   XmlYesNo = ("yes" | "no") >_aval %xmlsd ; 
    215   XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; 
    216   XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?>" ; 
    217  
    218   SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; 
    219   PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*  >_aval %pubid '"' | 
    220     "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; 
    221   ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; 
    222   DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ; 
    223   StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ; 
    224   EndXmlProcIns = "?>" ; 
    225  
    226   html_comment := |* 
    227     EndComment @{ EBLK(comment, 3); fgoto main; }; 
    228     any | newline { TEXT_PASS(); }; 
    229   *|; 
    230  
    231   html_cdata := |* 
    232     EndCdata @{ EBLK(cdata, 3); fgoto main; }; 
    233     any | newline { TEXT_PASS(); }; 
    234   *|; 
    235  
    236   html_procins := |* 
    237     EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; 
    238     any | newline { TEXT_PASS(); }; 
    239   *|; 
    240  
    241   main := |* 
    242     XmlDecl >newEle { ELE(xmldecl); }; 
    243     DocType >newEle { ELE(doctype); }; 
    244     StartXmlProcIns >newEle { fgoto html_procins; }; 
    245     StartTag >newEle { ELE(stag); }; 
    246     EndTag >newEle { ELE(etag); }; 
    247     EmptyTag >newEle { ELE(emptytag); }; 
    248     StartComment >newEle { fgoto html_comment; }; 
    249     StartCdata >newEle { fgoto html_cdata; }; 
    250     any | newline { TEXT_PASS(); }; 
    251   *|; 
     183  include hpricot_common "ext/hpricot_scan/hpricot_common.rl"; 
     184 
    252185}%% 
    253186 
  • trunk/ext/hpricot_scan/hpricot_scan.rl

    r101 r110  
    103103  } 
    104104 
    105   # 
    106   # HTML tokens 
    107   # (a blatant rip from HTree) 
    108   # 
    109   newline = '\n' @{curline += 1;} ; 
    110 # qtext = '"' ( '\"' | [^\n"] )* '"' | "'" ( "\\'" | [^\n'] )* "'" ;  
    111   NameChar = [\-A-Za-z0-9._:?] ; 
    112   Name = [A-Za-z_:] NameChar* ; 
    113   StartComment = "<!--" ; 
    114   EndComment = "-->" ; 
    115   StartCdata = "<![CDATA[" ; 
    116   EndCdata = "]]>" ; 
    117  
    118   NameCap = Name >_tag %tag; 
    119   NameAttr = NameChar+ >_akey %akey ; 
    120   Q1Attr = [^']* >_aval %aval ; 
    121   Q2Attr = [^"]* >_aval %aval ; 
    122   UnqAttr = ( space >_aval | [^ \t\n<>"'] >_aval [^ \t\n<>]* %aunq ) ; 
    123   Nmtoken = NameChar+ >_akey %akey ; 
    124  
    125   Attr =  NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; 
    126   AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; 
    127   AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; 
    128   StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; 
    129   EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; 
    130  
    131   EndTag = "</" NameCap space* ">" ; 
    132   XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; 
    133   XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; 
    134   XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; 
    135   XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; 
    136   XmlYesNo = ("yes" | "no") >_aval %xmlsd ; 
    137   XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; 
    138   XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?>" ; 
    139  
    140   SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; 
    141   PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*  >_aval %pubid '"' | 
    142     "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; 
    143   ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; 
    144   DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ; 
    145   StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ; 
    146   EndXmlProcIns = "?>" ; 
    147  
    148   html_comment := |* 
    149     EndComment @{ EBLK(comment, 3); fgoto main; }; 
    150     any | newline { TEXT_PASS(); }; 
    151   *|; 
    152  
    153   html_cdata := |* 
    154     EndCdata @{ EBLK(cdata, 3); fgoto main; }; 
    155     any | newline { TEXT_PASS(); }; 
    156   *|; 
    157  
    158   html_procins := |* 
    159     EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; 
    160     any | newline { TEXT_PASS(); }; 
    161   *|; 
    162  
    163   main := |* 
    164     XmlDecl >newEle { ELE(xmldecl); }; 
    165     DocType >newEle { ELE(doctype); }; 
    166     StartXmlProcIns >newEle { fgoto html_procins; }; 
    167     StartTag >newEle { ELE(stag); }; 
    168     EndTag >newEle { ELE(etag); }; 
    169     EmptyTag >newEle { ELE(emptytag); }; 
    170     StartComment >newEle { fgoto html_comment; }; 
    171     StartCdata >newEle { fgoto html_cdata; }; 
    172     any | newline { TEXT_PASS(); }; 
    173   *|; 
     105  include hpricot_common "ext/hpricot_scan/hpricot_common.rl"; 
     106 
    174107}%% 
    175108