Changeset 123
- Timestamp:
- 03/23/2007 11:53:54 (20 months ago)
- Location:
- trunk/lib
- Files:
-
- 3 added
- 5 modified
-
hpricot.rb (modified) (1 diff)
-
hpricot/blankslate.rb (added)
-
hpricot/builder.rb (added)
-
hpricot/elements.rb (modified) (2 diffs)
-
hpricot/parse.rb (modified) (3 diffs)
-
hpricot/tag.rb (modified) (3 diffs)
-
hpricot/tags.rb (added)
-
hpricot/traverse.rb (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/lib/hpricot.rb
r48 r123 24 24 require 'hpricot/inspect' 25 25 require 'hpricot/parse' 26 require 'hpricot/builder' -
trunk/lib/hpricot/elements.rb
r120 r123 129 129 # Add to the end of the contents inside each element in this list. 130 130 # Pass in an HTML +str+, which is turned into Hpricot elements. 131 def append(str )132 each { |x| x. inner_html += str}131 def append(str = nil, &blk) 132 each { |x| x.html(x.children + Hpricot.make(str, &blk)) } 133 133 end 134 134 135 135 # Add to the start of the contents inside each element in this list. 136 136 # Pass in an HTML +str+, which is turned into Hpricot elements. 137 def prepend(str )138 each { |x| x. inner_html = str + x.inner_html}137 def prepend(str = nil, &blk) 138 each { |x| x.html(Hpricot.make(str, &blk) + x.children) } 139 139 end 140 140 141 141 # Add some HTML just previous to each element in this list. 142 142 # Pass in an HTML +str+, which is turned into Hpricot elements. 143 def before(str )144 each { |x| x.parent.insert_before Hpricot.make(str ), x }143 def before(str = nil, &blk) 144 each { |x| x.parent.insert_before Hpricot.make(str, &blk), x } 145 145 end 146 146 147 147 # Just after each element in this list, add some HTML. 148 148 # Pass in an HTML +str+, which is turned into Hpricot elements. 149 def after(str )150 each { |x| x.parent.insert_after Hpricot.make(str ), x }149 def after(str = nil, &blk) 150 each { |x| x.parent.insert_after Hpricot.make(str, &blk), x } 151 151 end 152 152 … … 159 159 # 160 160 # This code wraps every link on the page inside a +div.link+ and a +div.link_inner+ nest. 161 def wrap(str )161 def wrap(str = nil, &blk) 162 162 each do |x| 163 wrap = Hpricot.make(str )163 wrap = Hpricot.make(str, &blk) 164 164 nest = wrap.detect { |w| w.respond_to? :children } 165 165 unless nest -
trunk/lib/hpricot/parse.rb
r95 r123 1 1 require 'hpricot/htmlinfo' 2 2 3 def Hpricot(input , opts = {})4 Hpricot.parse(input, opts )3 def Hpricot(input = nil, opts = {}, &blk) 4 Hpricot.parse(input, opts, &blk) 5 5 end 6 6 … … 12 12 # Hpricot.parse parses <i>input</i> and return a document tree. 13 13 # represented by Hpricot::Doc. 14 def Hpricot.parse(input , opts = {})15 Doc.new(make(input, opts ))14 def Hpricot.parse(input = nil, opts = {}, &blk) 15 Doc.new(make(input, opts, &blk)) 16 16 end 17 17 … … 24 24 # :stopdoc: 25 25 26 def Hpricot.make(input , opts = {})26 def Hpricot.make(input = nil, opts = {}, &blk) 27 27 opts = {:fixup_tags => false}.merge(opts) 28 29 case opts[:encoding] 30 when nil 31 when 'utf-8' 32 unless defined? Encoding::Character::UTF8 33 raise EncodingError, "The ruby-character-encodings library could not be found for utf-8 mode." 34 end 35 else 36 raise EncodingError, "No encoding option `#{opts[:encoding]}' is available." 37 end 38 39 if opts[:xhtml_strict] 40 opts[:fixup_tags] = true 41 end 42 43 stack = [[nil, nil, [], [], [], []]] 44 Hpricot.scan(input) do |token| 45 if stack.last[5] == :CDATA and ![:procins, :comment, :cdata].include?(token[0]) and 46 !(token[0] == :etag and token[1].downcase == stack.last[0]) 47 token[0] = :text 48 token[1] = token[3] if token[3] 49 end 50 51 case token[0] 52 when :stag 53 case opts[:encoding] when 'utf-8' 54 token.map! { |str| u(str) if str.is_a? String } 28 unless input or blk 29 raise ArgumentError, "An Hpricot document must be built from an input source (a String) or a block." 30 end 31 32 fragment = 33 if input 34 case opts[:encoding] 35 when nil 36 when 'utf-8' 37 unless defined? Encoding::Character::UTF8 38 raise EncodingError, "The ruby-character-encodings library could not be found for utf-8 mode." 55 39 end 56 57 stagname = token[0] = token[1].downcase 58 if ElementContent[stagname] == :EMPTY and !opts[:xml] 59 token[0] = :emptytag 60 stack.last[2] << token 61 else 62 unless opts[:xml] 63 if opts[:fixup_tags] 64 # obey the tag rules set up by the current element 65 if ElementContent.has_key? stagname 66 trans = nil 67 (stack.length-1).downto(0) do |i| 68 untags = stack[i][5] 69 break unless untags.include? stagname 70 # puts "** ILLEGAL #{stagname} IN #{stack[i][0]}" 71 trans = i 40 else 41 raise EncodingError, "No encoding option `#{opts[:encoding]}' is available." 42 end 43 44 if opts[:xhtml_strict] 45 opts[:fixup_tags] = true 46 end 47 48 stack = [[nil, nil, [], [], [], []]] 49 Hpricot.scan(input) do |token| 50 if stack.last[5] == :CDATA and ![:procins, :comment, :cdata].include?(token[0]) and 51 !(token[0] == :etag and token[1].downcase == stack.last[0]) 52 token[0] = :text 53 token[1] = token[3] if token[3] 54 end 55 56 case token[0] 57 when :stag 58 case opts[:encoding] when 'utf-8' 59 token.map! { |str| u(str) if str.is_a? String } 60 end 61 62 stagname = token[0] = token[1].downcase 63 if ElementContent[stagname] == :EMPTY and !opts[:xml] 64 token[0] = :emptytag 65 stack.last[2] << token 66 else 67 unless opts[:xml] 68 if opts[:fixup_tags] 69 # obey the tag rules set up by the current element 70 if ElementContent.has_key? stagname 71 trans = nil 72 (stack.length-1).downto(0) do |i| 73 untags = stack[i][5] 74 break unless untags.include? stagname 75 # puts "** ILLEGAL #{stagname} IN #{stack[i][0]}" 76 trans = i 77 end 78 if trans.to_i > 1 79 eles = stack.slice!(trans..-1) 80 stack.last[2] += eles 81 # puts "** TRANSPLANTED #{stagname} TO #{stack.last[0]}" 82 end 83 elsif opts[:xhtml_strict] 84 token[2] = {'class' => stagname} 85 stagname = token[0] = "div" 72 86 end 73 if trans.to_i > 1 74 eles = stack.slice!(trans..-1) 75 stack.last[2] += eles 76 # puts "** TRANSPLANTED #{stagname} TO #{stack.last[0]}" 87 end 88 89 # setup tag rules for inside this element 90 if ElementContent[stagname] == :CDATA 91 uncontainable_tags = :CDATA 92 elsif opts[:fixup_tags] 93 possible_tags = ElementContent[stagname] 94 excluded_tags, included_tags = stack.last[3..4] 95 if possible_tags 96 excluded_tags = excluded_tags | (ElementExclusions[stagname] || []) 97 included_tags = included_tags | (ElementInclusions[stagname] || []) 98 containable_tags = (possible_tags | included_tags) - excluded_tags 99 uncontainable_tags = ElementContent.keys - containable_tags 100 else 101 # If the tagname is unknown, it is assumed that any element 102 # except excluded can be contained. 103 uncontainable_tags = excluded_tags 77 104 end 78 elsif opts[:xhtml_strict]79 token[2] = {'class' => stagname}80 stagname = token[0] = "div"81 105 end 82 106 end 83 84 # setup tag rules for inside this element 85 if ElementContent[stagname] == :CDATA 86 uncontainable_tags = :CDATA 87 elsif opts[:fixup_tags] 88 possible_tags = ElementContent[stagname] 89 excluded_tags, included_tags = stack.last[3..4] 90 if possible_tags 91 excluded_tags = excluded_tags | (ElementExclusions[stagname] || []) 92 included_tags = included_tags | (ElementInclusions[stagname] || []) 93 containable_tags = (possible_tags | included_tags) - excluded_tags 94 uncontainable_tags = ElementContent.keys - containable_tags 95 else 96 # If the tagname is unknown, it is assumed that any element 97 # except excluded can be contained. 98 uncontainable_tags = excluded_tags 99 end 107 stack << [stagname, token, [], excluded_tags, included_tags, uncontainable_tags] 108 end 109 when :etag 110 etagname = token[0] = token[1].downcase 111 if opts[:xhtml_strict] and not ElementContent.has_key? etagname 112 etagname = token[0] = "div" 113 end 114 matched_elem = nil 115 (stack.length-1).downto(0) do |i| 116 stagname, = stack[i] 117 if stagname == etagname 118 matched_elem = stack[i] 119 stack[i][1] += token 120 eles = stack.slice!((i+1)..-1) 121 stack.last[2] += eles 122 break 100 123 end 101 124 end 102 stack << [stagname, token, [], excluded_tags, included_tags, uncontainable_tags] 103 end 104 when :etag 105 etagname = token[0] = token[1].downcase 106 if opts[:xhtml_strict] and not ElementContent.has_key? etagname 107 etagname = token[0] = "div" 108 end 109 matched_elem = nil 110 (stack.length-1).downto(0) do |i| 111 stagname, = stack[i] 112 if stagname == etagname 113 matched_elem = stack[i] 114 stack[i][1] += token 115 eles = stack.slice!((i+1)..-1) 116 stack.last[2] += eles 117 break 118 end 119 end 120 unless matched_elem 121 stack.last[2] << [:bogus_etag, token.first, token.last] 122 else 123 ele = stack.pop 124 stack.last[2] << ele 125 end 126 when :text 127 l = stack.last[2].last 128 if l and l[0] == :text 129 l[1] += token[1] 125 unless matched_elem 126 stack.last[2] << [:bogus_etag, token.first, token.last] 127 else 128 ele = stack.pop 129 stack.last[2] << ele 130 end 131 when :text 132 l = stack.last[2].last 133 if l and l[0] == :text 134 l[1] += token[1] 135 else 136 stack.last[2] << token 137 end 130 138 else 131 139 stack.last[2] << token 132 140 end 133 e lse134 stack.last[2] << token 135 end136 end137 138 while 1 < stack.length139 ele = stack.pop 140 st ack.last[2] << ele141 end142 143 structure_list = stack[0][2]144 structure_list.map {|s| build_node(s, opts) }141 end 142 143 while 1 < stack.length 144 ele = stack.pop 145 stack.last[2] << ele 146 end 147 148 structure_list = stack[0][2] 149 structure_list.map {|s| build_node(s, opts) } 150 elsif blk 151 Hpricot.build(&blk).children 152 end 145 153 end 146 154 -
trunk/lib/hpricot/tag.rb
r93 r123 4 4 class Doc 5 5 attr_accessor :children 6 def initialize(children )6 def initialize(children = []) 7 7 @children = children ? children.each { |c| c.parent = self } : [] 8 8 end … … 85 85 class STag < BaseEle 86 86 def initialize(name, attributes=nil) 87 @name = name. downcase87 @name = name.to_s.downcase 88 88 @attributes = {} 89 89 if attributes 90 @attributes = attributes.inject({}) { |hsh,(k,v)| hsh[k. downcase] = v; hsh }90 @attributes = attributes.inject({}) { |hsh,(k,v)| hsh[k.to_s.downcase] = v; hsh } 91 91 end 92 92 end … … 112 112 class ETag < BaseEle 113 113 def initialize(qualified_name) 114 @name = qualified_name 114 @name = qualified_name.to_s 115 115 end 116 116 alterable :name -
trunk/lib/hpricot/traverse.rb
r122 r123 90 90 91 91 # Adds elements immediately after this element, contained in the +html+ string. 92 def after(html )93 parent.insert_after(Hpricot.make(html ), self)92 def after(html = nil, &blk) 93 parent.insert_after(Hpricot.make(html, &blk), self) 94 94 end 95 95 96 96 # Adds elements immediately before this element, contained in the +html+ string. 97 def before(html )98 parent.insert_before(Hpricot.make(html ), self)97 def before(html = nil, &blk) 98 parent.insert_before(Hpricot.make(html, &blk), self) 99 99 end 100 100 … … 102 102 # Replace this element and its contents with the nodes contained 103 103 # in the +html+ string. 104 def swap(html )104 def swap(html = nil, &blk) 105 105 parent.altered! 106 parent.replace_child(self, Hpricot.make(html ))106 parent.replace_child(self, Hpricot.make(html, &blk)) 107 107 end 108 108 … … 133 133 134 134 # Builds an HTML string from the contents of this node. 135 def inner_html 136 if respond_to? :children 137 children.map { |x| x.output("") }.join 138 end 139 end 135 def html(inner = nil, &blk) 136 if inner or blk 137 altered! 138 case inner 139 when Array 140 self.children = inner 141 else 142 self.children = Hpricot.make(inner, &blk) 143 end 144 reparent self.children 145 else 146 if respond_to? :children 147 children.map { |x| x.output("") }.join 148 end 149 end 150 end 151 alias_method :inner_html, :html 140 152 alias_method :innerHTML, :inner_html 141 153 … … 143 155 # the HTML contained in string +inner+. 144 156 def inner_html=(inner) 145 altered! 146 case inner 147 when String, IO 148 self.children = Hpricot.parse(inner).children 149 when Array 150 self.children = inner 151 when nil 152 self.children = [] 153 end 154 reparent self.children 157 html(inner || []) 155 158 end 156 159 alias_method :innerHTML=, :inner_html=
