Error
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.
  • JHTMLicon not supported. File not found.

def

Compete.com Webstats Scrape Groovy
Monday, 16 January 2012 08:00
// description of your code here
This is a script for collecting webstats data from compete.com. The scripts takes as input the list of domains that you want to analyze and outputs the compete.com webstats data.

import com.gargoylesoftware.htmlunit.WebClient
import com.gargoylesoftware.htmlunit.BrowserVersion

def domainList = (new File("/root/Desktop/Morningstar/AlexaTop3000.txt")).readLines()
def outFile = new File("/root/Desktop/Morningstar/CompeteStats3000.csv")
outFile.delete()
def wc = new WebClient( BrowserVersion.FIREFOX_3_6 )

domainList.each {
def domainName = it.trim()
println domainName
def url = "http://siteanalytics.compete.com/export_csv/${domainName}/"
def page = wc.getPage( url )
def pageLines = page.getContent().split("\n")

def lineCount = 0
pageLines.each { line ->
if ( lineCount > 3 ) {
outFile.append( "\"${domainName}\",${line}\n" )
}
lineCount++
}
sleep( 400 )
}

Read more: http://feeds.dzone.com/~r/dzone/snippets/~3/MsOC6a5BkFo/14437

 
Cloths recommender
Friday, 25 November 2011 02:28
Prerequisites

Basic Ruby/Rails knowledge, including an installed version of Ruby 1.9.2, Rubygems, Bundler, and Rails 3.
Basic Git knowledge, including an installed version of Git.
Create a Heroku account http://www.heroku.com/
All the commands work on linux

1# Install the Heroku client:


$ gem install heroku


2# Write Your App

You may be starting from an existing app from http://dl.dropbox.com/u/261809/clothes_recommender.tar.gz. If not you can create your own


$ rails new myapp
$ cd myapp


3# Edit your Gemfile and change the line:

group :production do
gem 'pg'
end

group :development, :test do
gem 'sqlite3'
end

gem 'httparty'



4# Install Gems


$ bundle install --without production


5# Store Your App in Git


$ git init
$ git add .
$ git commit -m "init"


6# Deploy to Heroku/Cedar

Create the app on the Cedar stack:

$ heroku create --stack cedar


7# Creating the recommender:

* Create the models

create file app/models/google_weather.rb


class GoogleWeather
include HTTParty
base_uri "www.google.com"

attr_reader :long , :lat

def initialize(options)
@long = options[:long].to_f*1000000
@lat = options[:lat].to_f*1000000
end

def weather
@weather ||= self.class.get("/ig/api", :query => {:weather => ",,,#{@lat.to_i},#{@long.to_i}"}, :format => :xml)
end

def current_condition
@current_condition ||=@weather['xml_api_reply']['weather']['current_conditions']["condition"]["data"]
end

def current_temp
@current_temp ||= @weather['xml_api_reply']['weather']['current_conditions']["temp_f"]["data"]
end

def current_icon
@current_icon ||= "http://www.google.com/#{@weather['xml_api_reply']['weather']['current_conditions']["icon"]["data"]}"
end

def tomorrow_condition
@tomorrow_condition||=@weather['xml_api_reply']['weather']['forecast_conditions'][1]["condition"]["data"]
end

def tomorrow_high
@tomorrow_high||=@weather['xml_api_reply']['weather']['forecast_conditions'][1]["high"]["data"]
end

def tomorrow_low
@tomorrow_low||=@weather['xml_api_reply']['weather']['forecast_conditions'][1]["low"]["data"]
end

def tomorrow_avgtemp
@tomorrow_avgtemp||=(tomorrow_low.to_i+tomorrow_high.to_i)/2
end
end



create file app/models/clothes_recommender.rb


class ClothesRecommender

def self.get_recommendation(lat,long)
response={}
options={:lat=>lat,:long=>long}
gw=GoogleWeather.new(options)
gw.weather
response[:display_text]="Today's weather is #{gw.current_condition} at #{gw.current_temp}F, we recommend #{lookup(gw.current_condition,gw.current_temp)}"
response[:icon]=gw.current_icon
response
end

def self.lookup(condition,temp)
case condition.downcase
when "overcast"
return "you to carry an umberellea or rain coat"
when "rain"
return "not to forget your umberellea/rain coat"
when "mostly sunny"
return "take your umberellea/rain coat"
when "partly sunny"
return "Place holder text"
when "mostly cloudy"
return "Place holder text"
when "partly cloudy"
return "Place holder text"
when "clear"
return "Place holder text"
when "chance of rain"
return "Place holder text"
when "cloudy"
return "Place holder text"
end
case temp.to_i
when 81..100
return "you to wear light cotton clothing and bottle of water"
when 51..80
return "you to wear pants with a top and a lighter jacket"
when 41..50
return "you to take lighter jacket with you"
when 20..40
return "Place holder text"
when 0..30
return "Place holder text"
end
end
end


* Create controller

create file app/controller/recommender_controller.rb


class RecommenderController < ApplicationController

def index
@recommendations= ClothesRecommender.get_recommendation(params[:latitude].to_f,params[:longitude].to_f)
@recommendations[:details_url]="http://#{request.host_with_port}/recommender/details?latitude=#{params[:latitude]},longitude=#{params[:longitude]}"
end

def details
options={:lat=>params[:latitude],:long => params[:longitude]}
@gw=GoogleWeather.new(options)
@gw.weather
@today="Today's weather is #{@gw.current_condition} at #{@gw.current_temp}F, we recommend #{ClothesRecommender.lookup(@gw.current_condition,@gw.current_temp)}"
@tomorrow="Tomorrow's forecast #{@gw.tomorrow_condition} with High at #{@gw.tomorrow_high}F and Low at #{@gw.tomorrow_low}F, we recommend #{ClothesRecommender.lookup(@gw.tomorrow_condition,@gw.tomorrow_avgtemp)}"
end

end


* Create the views







def | tag






Today:
<%=@today%>



Tomorrow:
<%=@tomorrow%>







* Create xml builder

xml.instruct!
xml.dev_expert do
xml.display_text "#{@recommendations[:display_text]}"
xml.icon_hdpi_url "#{@recommendations[:icon]}"
xml.icon_mdpi_url "#{@recommendations[:icon]}"
xml.icon_ldpi_url "#{@recommendations[:icon]}"
xml.details_view_url "#{@recommendations[:details_url]}"
end



8# Deploy your code:

$ git push heroku master

When prompted for your heroku user name and password please enter

Read more: http://feeds.dzone.com/~r/dzone/snippets/~3/JjF1ADUg3gg/13965

 
Nano-search engine
Tuesday, 26 July 2011 14:09
Query macher is like a regexp but take a liste of word/-word 'a la google'
as request.

Exemple :
mot1 mot2 "two word" -mot3 -"g m" /f\d+/
give search on :
mot1 AND mot2 AND "two word" AND NOT mot3 AND NOT "g m" AND regexp(f[0-9]+)


class QueryMatcher
def initialize(str0)
str=str0.to_s
lexclu0=str.scan(/\s-"[^"]*"/) ; lexclu0.map! {|mot| str.gsub!(mot,"") ; mot.gsub(/["\-]/,'') }
lexclu1=str.scan(/\s-[^\s]*/) ; lexclu1.map! {|mot| str.gsub!(mot,"") ; mot.gsub('-','')}
lmot0=str.scan(/"[^"]*"/) ; lmot0.map! {|mot| str.gsub!(mot,"") ; mot.gsub('"','') }
lmot1=str.scan(/\/[^\/]+\//) ; lmot1.each {|mot| str.gsub!(mot,"") ; }
lmot2=str.split(/\s+/)
lexclu=lexclu0+lexclu1
lmot=lmot0+lmot1+lmot2-lexclu

@squery="(" + lmot.map { |mot| Regexp.escape(mot)}.join(" && ") +")"
@rquery= lmot.size>0 ? lmot.map { |mot| Regexp.new( mot=~/^\/.*\/$/ ? mot[1..-2] : Regexp.escape(mot) ) } : nil

@sequery= "(" + lexclu.map {|mot| Regexp.escape(mot[1..-1])}.join(")|(") +")"
@equery= lexclu.size>0 ? Regexp.new( @sequery ) : nil

@echo= @squery+ ((lexclu.size>0) ? (" AND NOT "+(@sequery)) : "")
end
def match(str)
return(false) if !@rquery
return(false) if (str||"").strip.length==0
@rquery.select { |re| return(false) if str !~ re }
return(true) unless @equery
return(str !~ @equery) rescue false
end
def to_s
@echo
end
def search_file(filename)
IO.readlines(filename).select { |line| match(line) }.map(&:chomp)
end
def search_dir(dir,glob)
res=[]
Dir.glob("#{dir}/#{glob}").each do |entry|
next if File.directory?(entry)
m=search_file(entry)
res << [entry,m] if m.size>0
end
res
end
end


Test :

qm =QueryMatcher.new("create dir -Declare")
l =qm.search_dir("ruby19/lib/ruby/1.9.1","*.rb")

puts "Search '#{qm}' in ruby19/lib/ruby/1.9.1 ::"

l.each { |f,ls| puts " %-15s :\n %s" % [File.basename(f),ls.join("\n ")] }



this give :

Search '(create && dir) AND NOT (Declare)' in ruby19/lib/ruby/1.9.1 ::
fileutils.rb :
# FileUtils.mkdir 'notexist', :noop => true # Does not really create.
# If +new+ already exists and it is a directory, creates a link +new/old+.
# exists and it is a directory, creates a symbolic link +new/old+. If +new+
rake.rb :
# Tasks are not usually created directly using the new method, but rather
rubygems.rb :
# subdirectories. If we can't create a directory due to a permission
tmpdir.rb :
# Dir.mktmpdir creates a temporary directory.
# The directory is created with 0700 permission.
# The directory is created under Dir.tmpdir or
path = Tmpname.create(prefix_suffix || "d", *rest) {|n| mkdir(n, 0700)}

Read more: http://feeds.dzone.com/~r/dzone/snippets/~3/UEuYO-upgkQ/13411

 
Concurent map
Friday, 17 June 2011 09:57
A simplified Map/Reduce in mono-process/multithread
Seem usefull if generator and Map traitements use much IO input.


require 'thread'
require 'timeout'



######################## Parallel Map engine ######################################

class ParallelMap
def initialize(options)
@nbThread = options[:nbThread] || 4
@generator = options[:generator] || raise("missing generator proc")
@mapper = options[:mapper] || raise("missing mapper proc")
@reducer = options[:reducer] || proc { |r| r }
@query=Queue.new
@result=Queue.new
@lthread=(1..@nbThread).to_a.map { |no| Thread.new(no) { mapping(@query,@result,no) } }
@th=Thread.new() { generating() }
end
def generating()
@generator.call(@query)
@nbThread.times { @query << :eend }
@lthread.each { |th| th.join }
end
def get_result()
@th.join
res=[] ; res << @result.pop while @result.size > 0
@reducer.call(res)
end

def mapping(queue,result,no)
loop {
mess=queue.shift
return if mess==:eend
begin
@mapper.call(result,mess)
rescue
result << [no.to_s,"ERROR",mess.inspect,$!.to_s].join(" ")
end
}
end
end


And here a use for recursive file grep :


############################# invoke bloc foreach filename matching file
def rfind(root,filter,&blk)
$nbfile=0
Dir.glob("#{root}/*").each do |en|
bn=File.basename(en)
next if bn =~ /^\.\.?$/
if File.directory?(en)
rfind(en,filter,&blk)
else
blk.call(en) if File.fnmatch( filter, bn.downcase())
end
end
end

####################### Map : grep on one file #####################

def selectLine(out,matcher,file)
result=[]
File.open(file,"r") do |f|
f.readlines.each_with_index { |line,nol|
out << "%s:%09d:%s" % [file,nol,line] if matcher =~ line
}
end
end

####################### Reduce : sort result by filename/noline

def reduce(l)
l.sort.map { |s|
a=s.split(":",2)
a[1].gsub!(/^0+/,'')
a.join(":")
}
end

####################################################################################
# M A I N #
####################################################################################

raise("Usage : > pgrep regexp path 'file-filter'") if ARGV.length != 3

query= /#{ARGV[0]}/
path = ARGV[1]
ext = ARGV[2]

starting=Time.now.to_f
result=ParallelMap.new(
:nbThread => 5,
:generator => proc { |res| rfind(path,"*."+ext.downcase) { |file| res << file} },
:mapper => proc { |out,in_file_name| selectLine(out,query,in_file_name) },
:reducer =>proc { |rr| reduce(rr) }
).get_result()

ending=Time.now.to_f
result.each { |s| puts s }
puts "\n Duration: #{ending-starting} secs"


Read more: http://feeds.dzone.com/~r/dzone/snippets/~3/8_D86-Av4aw/13263

 
Concurrent map
Friday, 17 June 2011 09:57
A simplified Map/Reduce in mono-process/multithread
Seem usefull if generator and Map traitements use much IO input.


require 'thread'
require 'timeout'



######################## Parallel Map engine ######################################

class ParallelMap
def initialize(options)
@nbThread = options[:nbThread] || 4
@generator = options[:generator] || raise("missing generator proc")
@mapper = options[:mapper] || raise("missing mapper proc")
@reducer = options[:reducer] || proc { |r| r }
@query=Queue.new
@result=Queue.new
@lthread=(1..@nbThread).to_a.map { |no| Thread.new(no) { mapping(@query,@result,no) } }
@th=Thread.new() { generating() }
end
def generating()
@generator.call(@query)
@nbThread.times { @query << :eend }
@lthread.each { |th| th.join }
end
def get_result()
@th.join
res=[] ; res << @result.pop while @result.size > 0
@reducer.call(res)
end

def mapping(queue,result,no)
loop {
mess=queue.shift
return if mess==:eend
begin
@mapper.call(result,mess)
rescue
result << [no.to_s,"ERROR",mess.inspect,$!.to_s].join(" ")
end
}
end
end


And here a use for recursive file grep :


############################# invoke bloc foreach filename matching file
def rfind(root,filter,&blk)
$nbfile=0
Dir.glob("#{root}/*").each do |en|
bn=File.basename(en)
next if bn =~ /^\.\.?$/
if File.directory?(en)
rfind(en,filter,&blk)
else
blk.call(en) if File.fnmatch( filter, bn.downcase())
end
end
end

####################### Map : grep on one file #####################

def selectLine(out,matcher,file)
result=[]
File.open(file,"r") do |f|
f.readlines.each_with_index { |line,nol|
out << "%s:%09d:%s" % [file,nol,line] if matcher =~ line
}
end
end

####################### Reduce : sort result by filename/noline

def reduce(l)
l.sort.map { |s|
a=s.split(":",2)
a[1].gsub!(/^0+/,'')
a.join(":")
}
end

####################################################################################
# M A I N #
####################################################################################

raise("Usage : > pgrep regexp path 'file-filter'") if ARGV.length != 3

query= /#{ARGV[0]}/
path = ARGV[1]
ext = ARGV[2]

starting=Time.now.to_f
result=ParallelMap.new(
:nbThread => 5,
:generator => proc { |res| rfind(path,"*."+ext.downcase) { |file| res << file} },
:mapper => proc { |out,in_file_name| selectLine(out,query,in_file_name) },
:reducer =>proc { |rr| reduce(rr) }
).get_result()

ending=Time.now.to_f
result.each { |s| puts s }
puts "\n Duration: #{ending-starting} secs"


Read more: http://feeds.dzone.com/~r/dzone/snippets/~3/8_D86-Av4aw/13263

 
Start
Prev
1


Page 1 of 3
Taxonomy by Zaragoza Online