def Utils.ircify_first_html_par_wh(xml_org, opts={})
doc = Hpricot(xml_org)
(doc/"style|script").remove
debug doc
strip = opts[:strip]
strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
min_spaces = opts[:min_spaces] || 8
min_spaces = 0 if min_spaces < 0
txt = String.new
pre_h = pars = by_span = nil
while true
debug "Minimum number of spaces: #{min_spaces}"
if pre_h.nil?
pre_h = Hpricot::Elements[]
found_h = false
doc.search("*") { |e|
next if e.bogusetag?
case e.pathname
when /^h\d/
found_h = true
when 'p'
pre_h << e if found_h
end
}
debug "Hx: found: #{pre_h.pretty_inspect}"
end
pre_h.each { |p|
debug p
txt = p.to_html.ircify_html
txt.sub!(strip, '') if strip
debug "(Hx attempt) #{txt.inspect} has #{txt.count(" ")} spaces"
break unless txt.empty? or txt.count(" ") < min_spaces
}
return txt unless txt.empty? or txt.count(" ") < min_spaces
pars = doc/"p" if pars.nil?
debug "par: found: #{pars.pretty_inspect}"
pars.each { |p|
debug p
txt = p.to_html.ircify_html
txt.sub!(strip, '') if strip
debug "(par attempt) #{txt.inspect} has #{txt.count(" ")} spaces"
break unless txt.empty? or txt.count(" ") < min_spaces
}
return txt unless txt.empty? or txt.count(" ") < min_spaces
if by_span.nil?
by_span = Hpricot::Elements[]
extra = Hpricot::Elements[]
doc.search("*") { |el|
next if el.bogusetag?
case el.pathname
when AFTER_PAR_PATH
by_span.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS
when AFTER_PAR_EX
extra.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS
end
}
if by_span.empty? and not extra.empty?
by_span.concat extra
end
debug "other \#1: found: #{by_span.pretty_inspect}"
end
by_span.each { |p|
debug p
txt = p.to_html.ircify_html
txt.sub!(strip, '') if strip
debug "(other attempt \#1) #{txt.inspect} has #{txt.count(" ")} spaces"
break unless txt.empty? or txt.count(" ") < min_spaces
}
return txt unless txt.empty? or txt.count(" ") < min_spaces
debug "Last candidate #{txt.inspect} has #{txt.count(" ")} spaces"
return txt unless txt.count(" ") < min_spaces
break if min_spaces == 0
min_spaces /= 2
end
end