require 'open-uri'
require 'rexml/document'
require 'nokogiri'
require 'builder'
include REXML
keyword = 'edinburgh'
url = "http://news.google.com/news?pz=1&cf=all&ned=uk&hl=en&q=#{URI.escape(keyword)}&cf=all&output=rss"
buffer = open(url, 'UserAgent' => 'S-Rscript').read
doc = Document.new(buffer)
nodes = XPath.match(doc.root,"//item")
desc = nodes[1].text('description').to_s
doc = Nokogiri::HTML(desc)
doc2 = Document.new(doc.xpath('html/body').to_xml)
xpath = "table/tr/td[2]/font/div[@class='lh']"
node = XPath.first(doc2.root, xpath)
a_tag = XPath.first node, 'a'
font_tag = XPath.first node, 'font'
node.delete a_tag
node.delete font_tag
desc = node.to_s.gsub(/<\/?[^>]*>|\n/, "").strip[/.*(?=\.\.\.)/]
#=> "Mike Blair is among three Scotland internationals who return to the Edinburgh starting line-up for the Magners League clash with Scarlets on Friday. "
Read more: http://feeds.dzone.com/~r/dzone/snippets/~3/h6gN32FfGlo/10693