#234 WideFinder-2 in Fan (version 1)

brian Wed 4 Jun 2008

I've prototyped a Fan implementation of Tim Bray's Wide Finder 2 project. This first version is a simple single threaded port of Tim's Ruby code. As one might expect of a Java/Ruby offspring, Fan's code looks pretty much the same except with type annotations.

Running this script on my PC with Server HotSpot on the 100k data set averages about 4.5sec versus about 7.2sec for Ruby 1.8.3 (not a huge difference). Timing shows that the report computation is trivial (20 to 30ms) - so the bulk of time is in the IO loop.

**
** WideFinder2 Version 1 - Single threaded
**
class WideFinder
{
  Str:Int hits    := Str:Int[:] { def = 0 }
  Str:Int bytes   := Str:Int[:] { def = 0 }
  Str:Int s404s   := Str:Int[:] { def = 0 }
  Str:Int clients := Str:Int[:] { def = 0 }
  Str:Int refs    := Str:Int[:] { def = 0 }
  Regex re := Regex.fromStr(r"^/ongoing/When/\d\d\dx/\d\d\d\d/\d\d/\d\d/[^ .]+$")

  Void record(Str client, Str u, Int size, Str ref)
  {
    bytes[u] += size
    if (!re.matches(u)) return
    hits[u]++
    clients[client]++
    if (ref != "\"-\"" && !ref.contains("http://www.tbray.org/ongoing/"))
      refs[ref[1..-2]]++  // lose the quotes
  }

  Void report(Str label, Str:Int map, Bool isBytes := false)
  {
    // find top 10
    threshold := map.values.sortr[9]
    top := map.findAll |Int v->Bool| { return v >= threshold }
    topKeys := top.keys.sortr |Str a, Str b->Int| { return top[a] <=> top[b] }

    echo(label)
    topKeys.each |Str key|
    {
      pkey := key.size > 60 ? key[0 .. 59] + "..." : key
      val  := top[key]
      if (isBytes) val /= 1024*1024
      echo("  ${val.toStr.justr(5)} $pkey")
    }
    echo("")
  }

  Void main()
  {
    t1 := Duration.now
    Sys.args[0].toUri.toFile.eachLine |Str line|
    {
      toks := line.split(" ")
      if (toks[5] != "\"GET") return
      client := toks[0]; u := toks[6]; status := toks[8]
      bytes := toks[9];  ref := toks[10]
      if (status == "200") record(client, u, bytes.toInt, ref)
      else if (status == "304") record(client, u, 0, ref)
      else if (status == "404") s404s[u]++
    }
    report("Top URIs by hit", hits)
    report("Top URIs by Megabytes", bytes, true)
    report("Top 404s", s404s)
    report("Top client addresses", clients)
    report("Top referrers", refs)
    t2 := Duration.now
    echo("Time: ${(t2-t1).toMillis}ms")
  }
}

Login or Signup to reply.