Changeset 110
- Timestamp:
- 02/06/2007 00:37:16 (22 months ago)
- Location:
- trunk/ext/hpricot_scan
- Files:
-
- 1 added
- 2 modified
-
hpricot_common.rl (added)
-
hpricot_scan.java.rl (modified) (1 diff)
-
hpricot_scan.rl (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/ext/hpricot_scan/hpricot_scan.java.rl
r109 r110 181 181 } 182 182 183 # 184 # HTML tokens 185 # (a blatant rip from HTree) 186 # 187 newline = '\n' @{curline += 1;} ; 188 # qtext = '"' ( '\"' | [^\n"] )* '"' | "'" ( "\\'" | [^\n'] )* "'" ; 189 NameChar = [\-A-Za-z0-9._:?] ; 190 Name = [A-Za-z_:] NameChar* ; 191 StartComment = "<!--" ; 192 EndComment = "-->" ; 193 StartCdata = "<![CDATA[" ; 194 EndCdata = "]]>" ; 195 196 NameCap = Name >_tag %tag; 197 NameAttr = NameChar+ >_akey %akey ; 198 Q1Attr = [^']* >_aval %aval ; 199 Q2Attr = [^"]* >_aval %aval ; 200 UnqAttr = ( space >_aval | [^ \t\n<>"'] >_aval [^ \t\n<>]* %aunq ) ; 201 Nmtoken = NameChar+ >_akey %akey ; 202 203 Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; 204 AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; 205 AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; 206 StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; 207 EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; 208 209 EndTag = "</" NameCap space* ">" ; 210 XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; 211 XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; 212 XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; 213 XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; 214 XmlYesNo = ("yes" | "no") >_aval %xmlsd ; 215 XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; 216 XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?>" ; 217 218 SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; 219 PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' | 220 "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; 221 ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; 222 DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ; 223 StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ; 224 EndXmlProcIns = "?>" ; 225 226 html_comment := |* 227 EndComment @{ EBLK(comment, 3); fgoto main; }; 228 any | newline { TEXT_PASS(); }; 229 *|; 230 231 html_cdata := |* 232 EndCdata @{ EBLK(cdata, 3); fgoto main; }; 233 any | newline { TEXT_PASS(); }; 234 *|; 235 236 html_procins := |* 237 EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; 238 any | newline { TEXT_PASS(); }; 239 *|; 240 241 main := |* 242 XmlDecl >newEle { ELE(xmldecl); }; 243 DocType >newEle { ELE(doctype); }; 244 StartXmlProcIns >newEle { fgoto html_procins; }; 245 StartTag >newEle { ELE(stag); }; 246 EndTag >newEle { ELE(etag); }; 247 EmptyTag >newEle { ELE(emptytag); }; 248 StartComment >newEle { fgoto html_comment; }; 249 StartCdata >newEle { fgoto html_cdata; }; 250 any | newline { TEXT_PASS(); }; 251 *|; 183 include hpricot_common "ext/hpricot_scan/hpricot_common.rl"; 184 252 185 }%% 253 186 -
trunk/ext/hpricot_scan/hpricot_scan.rl
r101 r110 103 103 } 104 104 105 # 106 # HTML tokens 107 # (a blatant rip from HTree) 108 # 109 newline = '\n' @{curline += 1;} ; 110 # qtext = '"' ( '\"' | [^\n"] )* '"' | "'" ( "\\'" | [^\n'] )* "'" ; 111 NameChar = [\-A-Za-z0-9._:?] ; 112 Name = [A-Za-z_:] NameChar* ; 113 StartComment = "<!--" ; 114 EndComment = "-->" ; 115 StartCdata = "<![CDATA[" ; 116 EndCdata = "]]>" ; 117 118 NameCap = Name >_tag %tag; 119 NameAttr = NameChar+ >_akey %akey ; 120 Q1Attr = [^']* >_aval %aval ; 121 Q2Attr = [^"]* >_aval %aval ; 122 UnqAttr = ( space >_aval | [^ \t\n<>"'] >_aval [^ \t\n<>]* %aunq ) ; 123 Nmtoken = NameChar+ >_akey %akey ; 124 125 Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; 126 AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; 127 AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; 128 StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; 129 EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; 130 131 EndTag = "</" NameCap space* ">" ; 132 XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; 133 XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; 134 XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; 135 XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; 136 XmlYesNo = ("yes" | "no") >_aval %xmlsd ; 137 XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; 138 XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?>" ; 139 140 SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; 141 PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' | 142 "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; 143 ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; 144 DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ; 145 StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ; 146 EndXmlProcIns = "?>" ; 147 148 html_comment := |* 149 EndComment @{ EBLK(comment, 3); fgoto main; }; 150 any | newline { TEXT_PASS(); }; 151 *|; 152 153 html_cdata := |* 154 EndCdata @{ EBLK(cdata, 3); fgoto main; }; 155 any | newline { TEXT_PASS(); }; 156 *|; 157 158 html_procins := |* 159 EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; 160 any | newline { TEXT_PASS(); }; 161 *|; 162 163 main := |* 164 XmlDecl >newEle { ELE(xmldecl); }; 165 DocType >newEle { ELE(doctype); }; 166 StartXmlProcIns >newEle { fgoto html_procins; }; 167 StartTag >newEle { ELE(stag); }; 168 EndTag >newEle { ELE(etag); }; 169 EmptyTag >newEle { ELE(emptytag); }; 170 StartComment >newEle { fgoto html_comment; }; 171 StartCdata >newEle { fgoto html_cdata; }; 172 any | newline { TEXT_PASS(); }; 173 *|; 105 include hpricot_common "ext/hpricot_scan/hpricot_common.rl"; 106 174 107 }%% 175 108
