Changeset 146

Show
Ignore:
Timestamp:
06/02/2007 00:48:14 (18 months ago)
Author:
why
Message:
  • ext/hpricot_scan/hpricot_common.rl: more flexibility in the xml procins grammar (ticket #84)
  • lib/hpricot/xchar.rb: allow nils to the escaping and unescaping stuff.
  • lib/hpricot/parse.rb: ignore self-closing endings on tags (such as form) which are containers. Treat them like open parent tags. Reported by Jonathan Nichols on the hpricot list. Thanks and forever.
Location:
trunk
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • trunk/ext/hpricot_scan/hpricot_common.rl

    r126 r146  
    3737  XmlYesNo = ("yes" | "no") >_aval %xmlsd ; 
    3838  XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; 
    39   XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?>" ; 
     39  XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?"? ">" ; 
    4040 
    4141  SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; 
     
    4545  DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ; 
    4646  StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ; 
    47   EndXmlProcIns = "?>" ; 
     47  EndXmlProcIns = "?"? ">" ; 
    4848 
    4949  html_comment := |* 
  • trunk/lib/hpricot/parse.rb

    r136 r146  
    5454          token[0] = :text 
    5555          token[1] = token[3] if token[3] 
     56        end 
     57 
     58        if token[0] == :emptytag and ElementContent[token[1].send(conv)] != :EMPTY and !opts[:xml] 
     59          token[0] = :stag 
    5660        end 
    5761 
  • trunk/lib/hpricot/xchar.rb

    r145 r146  
    8686    # XML unescape 
    8787    def uxs(str) 
    88       str.gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || ??).chr }. 
     88      str.to_s. 
     89          gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || ??).chr }. 
    8990          gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") } 
    9091    end 
  • trunk/test/test_parser.rb

    r144 r146  
    115115    assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length 
    116116    assert_equal 17, @boingboing.search("h3[text()$='s']").length 
    117     assert_equal 128, @boingboing.search("p[text()]").length 
     117    assert_equal 129, @boingboing.search("p[text()]").length 
    118118    assert_equal 211, @boingboing.search("p").length 
    119119  end 
     
    155155    assert_equal 18, @boingboing.search("//script").length 
    156156    divs = @boingboing.search("//script/../div") 
    157     assert_equal 2,  divs.length 
    158     assert_equal 1,  divs.search('a').length 
     157    assert_equal 1,  divs.length 
    159158    imgs = @boingboing.search('//div/p/a/img') 
    160159    assert_equal 15, imgs.length 
     
    326325  end 
    327326   
     327  # ticket #84 by jamezilla 
     328  def test_screwed_xmlns 
     329    doc = Hpricot(<<-edoc) 
     330      <?xml:namespace prefix = cwi /> 
     331      <html><body>HAI</body></html> 
     332    edoc 
     333    assert_equal "HAI", doc.at("body").inner_text 
     334  end 
     335 
     336  # Reported by Jonathan Nichols on the Hpricot list (24 May 2007) 
     337  def test_self_closed_form 
     338    doc = Hpricot(<<-edoc) 
     339      <body> 
     340      <form action="/loginRegForm" name="regForm" method="POST" /> 
     341      <input type="button"> 
     342      </form> 
     343      </body> 
     344    edoc 
     345    assert_equal "button", doc.at("//form/input")['type'] 
     346  end 
     347 
    328348  def test_filters 
    329349    @basic = Hpricot.parse(TestFiles::BASIC)