Changeset 95

Show
Ignore:
Timestamp:
01/27/2007 12:03:07 (22 months ago)
Author:
why
Message:
  • lib/hpricot/parse.rb: no raw strings for text, cdata, comment, procins. redundant waste of cycles.
  • ext/hpricot_scan/hpricot_scan.rl: ditto. and a small bug in checking for text elements which was causing two doctypes (a real one and a text one). see #36.
  • test/test_preserved.rb: check that Hpricot preserves all the HTML of our test pages when to_original_html is used.
Location:
trunk
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • trunk/ext/hpricot_scan/hpricot_scan.rl

    r85 r95  
    1818#define ELE(N) \ 
    1919  if (tokend > tokstart || text == 1) { \ 
    20     ele_open = 0; \ 
    21     rb_yield_tokens(sym_##N, tag, attr, tokstart == 0 ? Qnil : rb_str_new(tokstart, tokend-tokstart), taint); \ 
     20    VALUE raw_string = Qnil; \ 
     21    ele_open = 0; text = 0; \ 
     22    if (tokstart != 0 && sym_##N != sym_cdata && sym_##N != sym_text && sym_##N != sym_procins && sym_##N != sym_comment) { \ 
     23      raw_string = rb_str_new(tokstart, tokend-tokstart); \ 
     24    } \ 
     25    rb_yield_tokens(sym_##N, tag, attr, raw_string, taint); \ 
    2226  } 
    2327 
     
    5458    } 
    5559 
    56 #define EBLK(N, T) CAT(tag, p - T + 1); ELE(N); text = 0; 
     60#define EBLK(N, T) CAT(tag, p - T + 1); ELE(N); 
    5761 
    5862%%{ 
  • trunk/lib/hpricot/parse.rb

    r94 r95  
    206206  def Text.parse_pcdata(raw_string) 
    207207    result = Text.new(raw_string) 
    208     result.raw_string = raw_string 
    209208    result 
    210209  end 
     
    212211  def Text.parse_cdata_content(raw_string) 
    213212    result = CData.new(raw_string) 
    214     result.raw_string = raw_string 
    215213    result 
    216214  end 
     
    218216  def Text.parse_cdata_section(content) 
    219217    result = CData.new(content) 
    220     result.raw_string = content 
    221218    result 
    222219  end 
     
    256253    _, target, content = *raw_string.match(/\A<\?(\S+)\s+(.+)/m) 
    257254    result = ProcIns.new(target, content) 
    258     result.raw_string = "#{raw_string}?>" 
    259255    result 
    260256  end 
     
    262258  def Comment.parse(content) 
    263259    result = Comment.new(content) 
    264     result.raw_string = "<!--" + content.to_s + "-->" 
    265260    result 
    266261  end 
  • trunk/test/test_parser.rb

    r94 r95  
    4848 
    4949  def scan_basic doc 
     50    assert_kind_of Hpricot::XMLDecl, doc.children.first  
     51    assert_not_equal doc.children.first.to_s, doc.children[1].to_s  
    5052    assert_equal 'link1', doc.at('#link1')['id'] 
    5153    assert_equal 'link1', doc.at("p a")['id'] 
  • trunk/test/test_preserved.rb

    r83 r95  
    33require 'test/unit' 
    44require 'hpricot' 
     5require 'load_files' 
    56 
    67class TestPreserved < Test::Unit::TestCase 
     8  def assert_roundtrip str 
     9    doc = Hpricot(str) 
     10    yield doc if block_given? 
     11    str2 = doc.to_original_html 
     12    [*str].zip([*str2]).each do |s1, s2| 
     13      assert_equal s1, s2 
     14    end 
     15  end 
     16 
    717  def assert_html str1, str2 
    818    doc = Hpricot(str2) 
     
    2939  end 
    3040 
     41  def test_files 
     42    assert_roundtrip TestFiles::BASIC 
     43    assert_roundtrip TestFiles::BOINGBOING 
     44  end 
    3145end