Ticket #158: test4.rb

File test4.rb, 0.9 kB (added by roscommon, 3 months ago)

script producing the error

Line 
1
2#!/usr/local/bin/ruby -w
3
4require 'rubygems'
5require 'threadpool'
6require 'pp'
7require 'hpricot'
8require 'open-uri'
9
10$DEBUG = true
11processed = []
12
13pages = %w( http://www.ibm.com http://www.axisforex.com.au http://www.abc.net.au http://www.cnn.com http://www.jobx.com http://www.jobx.com.au)
14print "#{Time.now}\n"
15
16pages.each do |i|
17        print "started::url:[#{i}]\n"
18        begin
19                page = open(i) { |f| Hpricot(f) }
20                title = (page.at("title").nil?) ? "(none)" : page.at("title").inner_html.sub(%r{<body.*?>(.*?)</body>}mi, '\1').gsub(/<.*?>/m, ' ').gsub(%r{(\n\s*){2}}, "\n\n").strip
21                h1 = (page.at("h1").nil?) ? "(none)" : page.at("h1").inner_html.sub(%r{<body.*?>(.*?)</body>}mi, '\1').gsub(/<.*?>/m, ' ').gsub(%r{(\n\s*){2}}, "\n\n").strip
22                print "job #{i} title:[#{title}]\th1:[#{h1}].\n"
23                processed << i
24        rescue
25                print "error #{i}\n"
26        end
27end
28pp processed
29print "#{Time.now}\n"