# 2013-04-29 17:02
# Last tested with MS nmake v11.0 from Visual Studio 2012.

# *** IMPORTANT USAGE NOTE ***
# Surprisingly for those accustomed to GNU make, nmake executes only one
# level of inference rules.  To get from val1.c to val1.txt, it is not
# sufficient to say nmake val1.txt.  One must instead say:
#   nmake val1.exe && nmake val1.data && nmake val1.txt
# or
#   nmake && nmake alldata && nmake alltxt
# This can be avoided by duplicating code in the makefile, but that has its
# own negative consequences.

.SUFFIXES: .c .i .exe .data .txt .dot .fail

allexe:  val1.exe val2.exe val3.exe val4.exe val5.exe val6.exe
alldata: val1.data val2.data val3.data val4.data val5.data val6.data
alltxt:  val1.txt val2.txt val3.txt val4.txt val5.txt val6.txt
alldot:  val1.dot val2.dot val3.dot val4.dot val5.dot val6.dot

# Compiler switches:
#   /O2 optimization and 64-bit build are enabled by default.
#   /Zi says to put debug info in .pdb file (but that fails).
#   /Z7 puts it in the obj file in an older format.
#   /Z7 implies /Od (disable optimizations) unless another optimization is
#   explicitly set.
#   /link /DEBUG passes /DEBUG to MS link.
# Although inlining is prevented by pragmas in the source code, sibling call
# optimization (in GCC's terminology) continues to confound the call stacks
# even at /O1.  A switch to turn it off selectively has not been found.
.c.exe:
	icl $< $(ADDCFLAGS) /Qstd=c99 /Z7 $(OPTLVL) /link /DEBUG

# Using /debug:inline-debug-info with icl and -inline-mode=on with amplxe-cl
# produces only the first level of inlines.  See the release notes for VTune
# Amplifier XE 2013 for Windows Update 5 (2013-02-21):  "Intel Compiler only
# produces first level of inlines.  The nested inlines are not emitted into
# the debug information.  (200164310)"

dataclean:
	if exist val1.data rmdir /Q /S val1.data
	if exist val2.data rmdir /Q /S val2.data
	if exist val3.data rmdir /Q /S val3.data
	if exist val4.data rmdir /Q /S val4.data
	if exist val5.data rmdir /Q /S val5.data
	if exist val6.data rmdir /Q /S val6.data

clean: dataclean
	del val?.exe val?.ilk val?.obj val?.pdb val?.txt val?.dot

semiclean: dataclean
	del val?.exe val?.ilk val?.obj val?.pdb

# Produce preprocessed source on request.
.c.i:
	icl $< /Qstd=c99 /P

# Collect data and report.
#
# Core 2 CPUs have CPU_CLK_UNHALTED.CORE not CPU_CLK_UNHALTED.THREAD.
# On Core i7 and Sandy Bridge CPUs it's the opposite.
# Bloomfield/Nehalem are i7.

# As of Amplifier XE 2013 Update 6, command-line reporting for runsa with
# call stacks is not working:  http://software.intel.com/en-us/forums/topic/368851
# To reproduce the problem:  nmake clean && nmake val1.exe && nmake val1.fail
# Event CPU_CLK_UNHALTED.THREAD_P is specified to work around a problem where
# the sample count using the fixed counter was way too low.  This may be the
# issue "Event-based collection on fixed counters with stacks collection
# enabled fails on some CPUs (200179695)" (see release notes).
# Default sa for the fixed counter is 2000000.
.exe.fail:
	amplxe-cl -collect-with runsa -knob enable-stack-collection=true -knob event-config=CPU_CLK_UNHALTED.THREAD_P:sa=1000000 -result-dir=$*.data -- $<
	amplxe-cl -report gprof-cc -result-dir=$*.data

# The substitute for runsa, lightweight-hotspots, supports neither
# sampling-interval nor event-config.
.exe.data:
	amplxe-cl -collect lightweight-hotspots -knob enable-stack-collection=true -result-dir=$@ -- $<

# The following options have no effect on gprof-cc formatted output:
#   -knob enable-call-counts=true
#   -show-as=samples
.data.txt:
	amplxe-cl -report gprof-cc -result-dir=$< -report-output=$@

# The following Python script invocation is untested on Windows (copied the
# data back and ran on Linux).
# Works for gprof2dot.py rev. 2013-04-09 16:53.
.txt.dot:
	gprof2dot.py -w -f axe -o $@ < $<
# make alldot fails if ancestors have been deleted.
forcedot:
	gprof2dot.py -w -f axe -o val1.dot < val1.txt
	gprof2dot.py -w -f axe -o val2.dot < val2.txt
	gprof2dot.py -w -f axe -o val3.dot < val3.txt
	gprof2dot.py -w -f axe -o val4.dot < val4.txt
	gprof2dot.py -w -f axe -o val5.dot < val5.txt
	gprof2dot.py -w -f axe -o val6.dot < val6.txt

# For val1 anomaly experiment, no callchains.
# Report type hotspots still fails with this:
#   -collect-with runsa -knob enable-stack-collection=false
# "This result does not have data that can be displayed with the following
# specified 'group-by' argument"
val1test:
	amplxe-cl -collect lightweight-hotspots -knob enable-stack-collection=false -result-dir=val1.data -- val1.exe
	amplxe-cl -report hotspots -result-dir=val1.data >> val1.txt
	rmdir /Q /S val1.data
