|
Monday, 02 May 2011 02:03 |
// library for working with delimiter-separated values files
module Dsv
open System
open System.IO
// An F# mutually-tail-recursive CSV record parser.
// See the spec at http://creativyst.com/Doc/Articles/CSV/CSV01.htm
let rec csvrecord sep (tr:#TextReader) record (line:string) i =
if i = line.Length then record @ [""]
else
match line.[i] with
| '"' -> csvfield sep tr record "" line (i+1)
| ' ' | '\t' -> csvrecord sep tr record line (i+1)
| c when c = sep -> csvrecord sep tr (record @ [""]) line (i+1)
| '=' when line.[i+1] = '"' -> csvfield sep tr record "" line (i+2) // Excel compatibility
| _ -> // unquoted field data
let fs = line.IndexOf(sep,i)
if fs = -1 then record @ [line.Substring(i).TrimEnd()]
else
csvrecord sep tr (record @ [line.Substring(i,fs-i).TrimEnd()]) line (fs+1)
and csvfield sep (tr:#TextReader) record field (line:string) i =
if i = line.Length then csvfield sep tr record (field+"\n") (tr.ReadLine()) 0
elif line.[i] <> '"' then
let q = line.IndexOf('"',i)
if q = -1 then csvfield sep tr record (field+line.Substring(i)+"\n") (tr.ReadLine()) 0
else csvfield sep tr record (field+line.Substring(i,q-i)) line q
elif i = line.Length-1 then record @ [field]
elif line.[i+1] = '"' then csvfield sep tr record (field+"\"") line (i+2)
elif line.[i+1] = sep then csvrecord sep tr (record @ [field]) line (i+2)
else // not an escaped quote and not end of field; try to recover by appending trimmed unquoted field data
let fs = line.IndexOf(sep,i+1)
if fs = -1 then record @ [field+line.Substring(i).TrimEnd()]
else csvrecord sep tr (record @ [field+line.Substring(i,fs-i).TrimEnd()]) line (fs+1)
let csvrows sep (filepath:string) = seq {
use sr = new StreamReader(filepath)
while not sr.EndOfStream do
yield csvrecord sep sr [] (sr.ReadLine()) 0
}
let csvrecords sep (filepath:string) = seq {
let lists = csvrows sep filepath
let headers = Seq.head lists
for vals in (Seq.skip 1 lists) do
yield List.zip headers vals |> Map.ofList
}
let tsvrows (sep:char) (filepath:string) = seq {
use sr = new StreamReader(filepath)
while not sr.EndOfStream do
yield sr.ReadLine().Split(sep) |> List.ofArray
}
let rows (filepath:string) =
let filename = Path.GetFileName(filepath)
match Path.GetExtension(filename).ToLower() with
| ".csv" -> csvrows ',' filepath
| ".tsv" | ".tab" -> tsvrows '\t' filepath
| ".skv" -> csvrows ';' filepath
| ".psv" -> tsvrows '|' filepath
| ".log" when filename.StartsWith("ex") || filename.StartsWith("u_ex") -> tsvrows ' ' filepath
| _ ->
use sr = new StreamReader(filepath)
let line = sr.ReadLine()
let sep = ['\t';'|';';';','] |> List.sortBy (fun c -> -line.Split(c).Length) |> List.head
if sep = ',' || sep = ';' then
csvrows sep filepath
else
tsvrows sep filepath
let headersFrom (filepath:string) =
Seq.head (rows filepath)
let records (filepath:string) = seq {
let lists = rows filepath
let headers = Seq.head lists
for vals in (Seq.skip 1 lists) do
yield List.zip headers vals |> Map.ofList
}
let listOfFileLines filepath =
File.ReadAllLines(filepath) |> List.ofArray
let setOfFileLines filepath =
File.ReadAllLines(filepath) |> Set.ofArray
let listOfColnum filepath colnum =
rows filepath |> Seq.map (fun x -> x.[colnum]) |> List.ofSeq
let setOfColnum filepath colnum =
rows filepath |> Seq.map (fun x -> x.[colnum]) |> Set.ofSeq
let listOfColumn filepath column =
records filepath |> Seq.map (fun x -> x.[column]) |> List.ofSeq
let setOfColumn filepath column =
records filepath |> Seq.map (fun x -> x.[column]) |> Set.ofSeq
let mapOfColnums filepath keycolnum valcolnum =
rows filepath |> Seq.map (fun x -> (x.[keycolnum],x.[valcolnum])) |> Map.ofSeq
let mapOfColumns filepath keycol valcol =
records filepath |> Seq.map (fun x -> (x.[keycol],x.[valcol])) |> Map.ofSeq
let countOfColnum filepath colnum =
listOfColnum filepath colnum |> Seq.countBy (fun x -> x) |> Seq.sortBy (fun (v,c) -> -c)
let countOfColumn filepath column =
listOfColumn filepath column |> Seq.countBy (fun x -> x) |> Seq.sortBy (fun (v,c) -> -c)
let showCountOfColnum filepath colnum =
listOfColnum filepath colnum |> Seq.countBy (fun x -> x) |> Seq.sortBy (fun (v,c) -> -c) |> Seq.iter (fun (v,c) -> printfn "%5d %s" c v)
let showCountOfColumn filepath column =
listOfColumn filepath column |> Seq.countBy (fun x -> x) |> Seq.sortBy (fun (v,c) -> -c) |> Seq.iter (fun (v,c) -> printfn "%5d %s" c v)
let showRowsAsList filepath =
let rs = rows filepath |> Seq.take 3 |> Seq.toList
let fmtfld (f:string) = f.Replace('n',' ').PadRight(20).Substring(0,20)
List.iteri (fun i _ -> printfn "%s" (List.fold (fun c (r:string list) -> c + " " + (fmtfld r.[i])) ((string i).PadLeft(3)) rs)) rs.[0]
let writeSet filepath set =
File.WriteAllLines(filepath,(Set.toArray set))
let writeRow sep (tw:#TextWriter) record =
let separator = sep.ToString()
tw.WriteLine(List.reduce (fun (x:string) y -> x + separator + y) record)
let writeCsvRows sep (tw:#TextWriter) record =
let quotefield sep (field:string) =
if field.IndexOfAny([|sep;'"';'n'|]) <> -1 || field <> field.Trim() then
"\"" + field.Replace("\"","\"""""\"") + "\""
else
field
writeRow sep tw (List.map (quotefield sep) record)
let writeXlRow (tw:#TextWriter) record =
let xlquotefield (field:string) =
if field <> field.Trim() then
"="" + field.Replace("\"","\"""""\"") + "\""
elif Array.forall (fun c -> Char.IsDigit(c)) (field.ToCharArray()) then
if field.Length > 15 || field.StartsWith("0") then "="" + field + "\""
else field
elif field.IndexOfAny([|',';'"';'n'|]) <> -1 then
"\"" + field.Replace("\"","\"""""\"") + "\""
else
field
writeRow ',' tw (List.map xlquotefield record)
let writeRows (filepath:string) lists =
use sw = new StreamWriter(filepath)
if filepath.EndsWith(".xl.csv") then
Seq.iter (writeXlRow sw) lists
else
match Path.GetExtension(filepath) with
| ".csv" -> Seq.iter (writeCsvRows ',' sw) lists
| ".skv" -> Seq.iter (writeCsvRows ';' sw) lists
| ".psv" -> Seq.iter (writeRow '|' sw) lists
| ".tsv" | ".tab" | _ -> Seq.iter (writeRow '\t' sw) lists
let writeRecords (filepath:string) headers maps =
let recordvals (map:Map) =
List.map (fun header -> map.[header]) headers
writeRows filepath (Seq.append (seq[headers]) (Seq.map recordvals maps))
let writeMapJoin (filepath:string) (keyfield:string) (newcolname:string) (map:Map) =
let extprefix f p = Path.ChangeExtension(f,p+Path.GetExtension(f))
let joins (record:Map) = map.ContainsKey(record.[keyfield])
let joined, unjoined = records filepath |> List.ofSeq |> List.partition joins
let join (record:Map) = record.Add(newcolname,map.[record.[keyfield]])
List.map join joined |> writeRecords (extprefix filepath newcolname) ((headersFrom filepath) @ [newcolname])
writeRecords (extprefix filepath "unmatched") (headersFrom filepath) unjoined
let writeFileJoin filepath keyfield mapfile keycol valcol =
writeMapJoin filepath keyfield valcol (mapOfColumns mapfile keycol valcol)
let rowsKeysDiffExclude filepath1 filepath2 (keys:Set) (exclude:Set) =
let keyvals (row:string list) = Set.map (fun i -> row.[i]) keys |> Set.toList
let load = rows >> Seq.map (fun (r:string list) -> keyvals r, r) >> Map.ofSeq
let f1, f2 = load filepath1, load filepath2
let both, f1only = Map.partition (fun k r -> f2.ContainsKey(k)) f1
let f2only = Map.filter (fun k r -> not(f1.ContainsKey(k))) f2
let rec diffrow (rowA:string list) (rowB:string list) i diffs =
if i < rowA.Length then
if rowA.[i] <> rowB.[i] && not(exclude.Contains(i)) then
diffrow rowA rowB (i+1) ((keyvals rowA @ [string (i+1);rowA.[i];rowB.[i]]) :: diffs)
else
diffrow rowA rowB (i+1) diffs
else
diffs
let rowdiffs = Map.toList both |> List.collect (fun (k,v) -> diffrow v f2.[k] 0 [])
f1only, f2only, rowdiffs
let rowsKeyDiff filepath1 filepath2 key =
rowsKeysDiffExclude filepath1 filepath2 (set [key]) Set.empty
let recordsKeysDiffExclude filepath1 filepath2 (keys:Set) (exclude:Set) =
let keyvals (record:Map) = Set.map (fun c -> record.[c]) keys |> Set.toList
let load = records >> Seq.map (fun (r:Map) -> keyvals r, r) >> Map.ofSeq
let f1, f2 = load filepath1, load filepath2
let both, f1only = Map.partition (fun k r -> f2.ContainsKey(k)) f1
let f2only = Map.filter (fun k r -> not(f1.ContainsKey(k))) f2
let rec diffrecord (recordA:Map) (recordB:Map) diffs = function
| [] -> diffs
| c :: ctail when exclude.Contains(c) || recordA.[c] = recordB.[c] -> diffrecord recordA recordB diffs ctail
| c :: ctail -> diffrecord recordA recordB ((keyvals recordA @ [c;recordA.[c];recordB.[c]]) :: diffs) ctail
let colnames = Map.fold (fun ks k _ -> k :: ks) [] (Map.pick (fun _ v -> Some(v)) both)
let recorddiffs = Map.toList both |> List.collect (fun (k,v) -> diffrecord v f2.[k] [] colnames)
f1only, f2only, recorddiffs
let recordsKeyDiff filepath1 filepath2 key =
recordsKeysDiffExclude filepath1 filepath2 (set [key]) Set.empty
 Read more: |
|
|
Thursday, 14 October 2010 09:31 |
Let's have an in-depth look at mechanics that the Rails framework for Ruby uses to render a list of records from a database table.
./app/controllers/records_controller.rb
class RecordsController < ApplicationController
def index
@records = Record.find( :all, :limit=>100 )
respond_to do |format|
format.html # GET /records
format.xml{ render( :xml )=>@records } # GET /records.xml
end
end
end
Ruby scaffold generator calls the listing method "index". The Controller takes desired @records from database so it can be used by the Viewer. In the following simple example we don't bother about pagination and the output is limited to the first 100 records.
You can pass an :offset parameter to find() to paginate output ( defining :limit=>records_per_page and :offset=>page_number*records_per_page )
./app/views/layouts/records.html.erb
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
record | tag
<%= stylesheet_link_tag 'scaffold' %>
<%= notice %>
<%= yield %>
The first call the Viewer does is to the page layout. Focus your attention to these two lines:
1. <%= stylesheet_link_tag 'scaffold' %>
references a common stylesheet for all controller named ./public/stylesheets/scaffold.css
2. <%= yield %> is where goes the output of the view corresponding to the controller method, ./app/views/records/index.html.erb in this case.
Let's have a look to these two files:
./public/stylesheets/scaffold.css
/* Design 6b */
table.Design6b {
border-spacing: 0px;
background: #E9E9F3;
border: 0.5em solid #E9E9F3;
}
table.Design6b thead th {
text-align: center;
font-weight: bold;
color: #6C6C9A;
border: 1px solid #9999AA;
}
table.Design6b tr.Odd {
background: #F6F4E4;
}
Notice the tag "Design6b" and "Odd", they are used for the html "class" attribute in the following two files:
./app/views/records/index.html.erb
Listing records
<%= render(
:partial=>'attributes',
:locals=>{ :record=>@record }
) %>
<% @records.each do |record| %>
<%= render(
:partial=>'each',
:locals=>{ :record=>record }
) %>
<% end %>
This View renders the index action using two "partial page templates", the first for table headers (record field names) and the second for every record. Let's have a look to them:
./app/views/records/_attributes.html.erb
<% for column in Record.content_columns %>
<%= h(record.send(column.name)) %> |
<% end %>
./app/views/records/_each.html.erb
<% for column in Record.content_columns %>
<%= column.human_name %> |
<% end %>
Those are all the fundamentals to do simple table record listing in Rails. Read more: |
|