# 2012-12-31 13:27
# Tested with GNU make 3.82

SHELL = /bin/bash
BINS = val1 val2 val3 val4 val5 val6

all: $(BINS)
results: $(addsuffix .txt,$(BINS))

# One can override those variables from the command line;
# e.g, make PRF=gprof OPTLVL=-O1 ...

# Select example recipes for perf, operf, oprofile, or gprof, or add
# your own.
PRF = perf

# If necessary to select a specific compiler version.
GCCBIN = gcc

# Sometimes anomalies occur only at higher levels of optimization.  Select
# the optimization level to use.
# OPTLVL = -O3 -fno-ipa-cp-clone
OPTLVL = -O2

# Resolve proftool-specific options.
ifneq ($(filter perf perfx100,$(PRF)),)
  PERFVERSION = $(shell perf --version | cut -f 3 -d ' ')
  PERFMAJOR   = $(shell echo ${PERFVERSION} | cut -f 1 -d .)
  PERFMINOR   = $(shell echo ${PERFVERSION} | cut -f 2 -d .)
  PERFVER = $(shell if (( ${PERFMAJOR}<3 || ${PERFMAJOR}==3 && ${PERFMINOR}<7 )) ; then echo old ; fi)
  ifeq ($(PERFVER),old)
    CFLAGS = $(OPTLVL) -fno-omit-frame-pointer
  else
    CFLAGS = $(OPTLVL)
    GPARM = dwarf
  endif
  # To try PEBS, do PERFEVENT=cpu-cycles:pp
  PERFEVENT=cpu-cycles
else ifeq ($(PRF),operf)
  CFLAGS = $(OPTLVL) -fno-omit-frame-pointer
else ifeq ($(PRF),oprofile)
  CFLAGS = $(OPTLVL) -fno-omit-frame-pointer
else ifeq ($(findstring gprof,$(PRF)),gprof)
  ifneq ($(filter-out -O0 -O1,$(OPTLVL)),)
    $(warning WARNING: specify OPTLVL -O0 or -O1 with gprof)
  endif
  CFLAGS = $(OPTLVL) -pg
else
  # Your code here
endif

# Specify how to turn a .c file into a profilable executable.
%: %.c
	$(GCCBIN) -std=gnu99 -Wall -Wextra -pedantic $(CFLAGS) $(ADDCFLAGS) -o $@ $<

%.pdf: %.dot
	dot -Gmargin=0 -Tpdf $< > $@

%.png: %.dot
	dot -Gmargin=0 -Tpng $< > $@

%.svg: %.dot
	dot -Gmargin=0 -Tsvg $< > $@

# Add to recipes or command line to get dot graph conversions:
#	make $<.pdf $<.png


# Notes / cautions on operf (0.9.9):

#   Operf reports spurious recursion.  A patch to fix this has been shared on
#   the mailing list but is not included in the 0.9.9 release.

# Notes / cautions on opcontrol (0.9.8):

#   In addition to allowing data collection for standard library and kernel
#   functions, the --separate=kernel option on opcontrol somehow works around
#   the anomaly in which only about 1 % of collected samples are used in the
#   calculation of self times.

#   The real surprise is that this option is STICKY.  It is saved in
#   ~root/.oprofile/daemonrc and persists from run to run until explicitly
#   changed.  So if it is used once, the 1 % samples anomaly "mysteriously"
#   becomes irreproducible, unless and until the opposite option
#   (--separate=none) is specified on some run.

# Notes / cautions on perf (3.12.6):

#   -g dwarf no longer parses (you have to say --call-graph dwarf).
#   -g with no argument still works, but it defaults to fp!
#   --call-graph has been legal syntax since way back, so just use that.

#   Also see ReleaseNotes about perf_cpu_time_max_percent.

# Specify how to generate a profile result given an executable.
# "make results" demands the .txt file; anything else is optional.
%.txt %.dot: %
ifeq ($(PRF),perf)
	perf record -e $(PERFEVENT) -c 1000000 -o $<_data --call-graph $(GPARM) ./$<
	perf report -i $<_data > $<.txt
	perf script -i $<_data > $<_script.txt
	gprof2dot.py -w -f perf --total=callratios -o $<_heuristic.dot < $<_script.txt
	gprof2dot.py -w -f perf --total=callstacks -o $<_stacks.dot < $<_script.txt
else ifeq ($(PRF),operf)
	mkdir $<_data
	operf --lazy-conversion --vmlinux /usr/src/linux/vmlinux -e CPU_CLK_UNHALTED:1000000 -d $<_data -g ./$<
	opreport --session-dir=$<_data -c -o $<.txt
	opreport --session-dir=$<_data -cgf | gprof2dot.py -w -f oprofile -o $<.dot
else ifeq ($(PRF),oprofile)
	# Must run as root for old style OProfile.
	opcontrol --init
	opcontrol --session-dir=$(CURDIR)/$<_data --vmlinux=/usr/src/linux/vmlinux --image=./$< --event="CPU_CLK_UNHALTED:1000000" --callgraph=5 --separate=kernel --start
	./$<
	opcontrol --shutdown
	opreport --session-dir=$<_data -c -o $<.txt
	gprof2dot.py -w -f oprofile -o $<.dot < $<.txt
else ifeq ($(PRF),gprof)
	./$<
	mv gmon.out $<_data
	gprof $< $<_data > $<.txt
	gprof2dot.py -w -f prof -o $<.dot < $<.txt
else ifeq ($(PRF),perfx100)
	# No callchains for this.
	# The -A option of perf record causes reporting errors, so we use
	# our own script to process the dump.
	for looper in {1..100}; do echo $${looper}; perf record -e $(PERFEVENT) -c 1000000 -o $<_data ./$<; perf script -i $<_data -f ip,sym,dso >> $<_script.txt; rm $<_data; done
	util/perf-sum-selftime.rb $<_script.txt > $<.txt
else ifeq ($(PRF),gprofx100)
	./$<
	mv gmon.out gmon.sum
	for looper in {2..100}; do ./$<; gprof -s ./$< gmon.out gmon.sum; done
	rm gmon.out
	mv gmon.sum $<_data
	gprof ./$< $<_data > $<.txt
	gprof2dot.py -w -f prof -o $<.dot < $<.txt
else
	# Your code here
endif

# Specify what gets deleted.
clean: resultsclean
	rm -f $(BINS)

resultsclean:
	rm -rf val?_data
	rm -f val?.txt val?_script.txt val?.dot val?_heuristic.dot val?_stacks.dot
	# Your code here

# Produce disassemblies on request.
%_objdump.txt: %
	objdump --disassemble --disassembler-options=x86-64 --no-show-raw-insn $< > $@

# Produce preprocessed source on request.
%.i: %.c
	$(GCCBIN) -std=gnu99 $(CFLAGS) $(ADDCFLAGS) -E -o $@ $<

# For val1 anomaly experiment, no callchains.
val1test: val1
ifeq ($(PRF),perf)
	perf record -e $(PERFEVENT) -c 1000000 -o val1_data ./val1
	perf script -i val1_data -f ip,sym,dso >> val1_perf.txt
	rm val1_data
else ifeq ($(PRF),operf)
	mkdir val1_data
	operf --lazy-conversion --vmlinux /usr/src/linux/vmlinux -e CPU_CLK_UNHALTED:1000000 -d val1_data ./val1
	opreport --session-dir=val1_data -l >> val1_operf.txt
	rm -rf val1_data
else ifeq ($(PRF),oprofile)
	opcontrol --init
	opcontrol --session-dir=$(CURDIR)/val1_data --vmlinux=/usr/src/linux/vmlinux --event="CPU_CLK_UNHALTED:1000000" --callgraph=0 --separate=kernel --start
	./val1
	opcontrol --shutdown
	opreport --session-dir=val1_data -l >> val1_oprofile.txt
	rm -rf val1_data
else ifeq ($(PRF),gprof)
	./val1
	gprof -b --no-graph val1 >> val1_gprof.txt
	rm gmon.out
endif
