From 0bc5cd38d4387e2624b8c67db0b5e282fd486421 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 Nov 2014 13:02:01 -0800
Subject: Revised benchmark procedure.

Now we take the difference of the time to process the input
and the time to run with no input.  This compensates for
slow startup time in dynamic languages.

See comments on 2dcef8a
---
 Makefile       |  1 +
 bench/stats.py | 10 +++++++++-
 benchmarks.md  | 29 ++++++++++++++++++-----------
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/Makefile b/Makefile
index cb8c687..4f8b15b 100644
--- a/Makefile
+++ b/Makefile
@@ -127,6 +127,7 @@ progit/progit.md: progit
 bench: $(BENCHFILE)
 	{ sudo renice 99 $$$$; \
 	  for x in `seq 1 $(NUMRUNS)` ; do \
+	  /usr/bin/env time -p ${PROG} </dev/null >/dev/null ; \
 	  /usr/bin/env time -p ${PROG} $< >/dev/null ; \
 		  done \
 	} 2>&1  | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py'
diff --git a/bench/stats.py b/bench/stats.py
index 3298099..c244b41 100644
--- a/bench/stats.py
+++ b/bench/stats.py
@@ -3,7 +3,15 @@
 import sys
 import statistics
 
-values = [ float(x) for x in sys.stdin.readlines()]
+def pairs(l, n):
+        return zip(*[l[i::n] for i in range(n)])
+
+# data comes in pairs:
+#    n - time for running the program with no input
+#    m - time for running it with the benchmark input
+# we measure (m - n)
+
+values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)]
 
 print("mean = %.4f, median = %.4f, stdev = %.4f" %
     (statistics.mean(values), statistics.median(values),
diff --git a/benchmarks.md b/benchmarks.md
index 3010091..574daf5 100644
--- a/benchmarks.md
+++ b/benchmarks.md
@@ -4,23 +4,30 @@ Some benchmarks, run on an ancient Thinkpad running Intel Core 2 Duo at 2GHz.
 
 |Implementation     |  Time (sec)| Factor  |
 |-------------------|-----------:|--------:|
-| Markdown.pl       | 2921.30    | 14606.5 |
-| kramdown          | 20.76      |   103.8 |
-| PHP markdown      | 20.475     |   102.4 |
-| lunamark          | 6.195      |    40.0 |
-| cheapskate        | 5.645      |    28.2 |
-| peg-markdown      | 5.330      |    26.7 |
-| **commonmark.js** | 2.730      |    13.6 |
-| marked            | 1.870      |     9.4 |
-| discount          | 1.660      |     8.3 |
-| **cmark**         | 0.320      |     1.6 |
+| Markdown.pl       | 2921.24    | 14606.2 |
+| PHP markdown      | 20.85      |   104.3 |
+| kramdown          | 20.83      |   104.1 |
+| lunamark          | 6.295      |    31.5 |
+| cheapskate        | 5.760      |    28.8 |
+| peg-markdown      | 5.450      |    27.3 |
+| **commonmark.js** | 2.675      |    13.4 |
+| marked            | 1.855      |     9.3 |
+| discount          | 1.705      |     8.5 |
+| **cmark**         | 0.315      |     1.6 |
 | sundown           | 0.200      |     1.0 |
 
+
 To run these benchmarks, use `make bench PROG=/path/to/program`.
 
 The input text is a 10MB Markdown file built by concatenating 20 copies
 of the Markdown source of the first edition of [*Pro
 Git*](https://github.com/progit/progit/tree/master/en) by Scott Chacon.
 
-`time` is used to measure execution speed.
+`time` is used to measure execution speed.  The reported
+time is the *difference* between the time to run the program
+with the benchmark input and the time to run it with no input.
+(This procedure ensures that implementation dynamic languages are
+not prenalized by startup time.) A median of ten runs is taken.  The
+process is reniced to a high priority so that the system doesn't
+interrupt runs.
 
-- 
cgit v1.2.3