#!/usr/bin/perl

use warnings;
use strict;

# ResolveQueries.pl
# Author: James Mayfield (james "dot" mayfield "at" jhuapl.edu)
my $version = "1.0";

binmode(STDOUT, ":utf8");

# This program takes input from a TAC Cold Start query file and a
# submitted knowledge base. It runs each evaluation query against the
# KB, producing one line of output for each relation traversed. I
# needed this to work on a machine without a great deal of memory, so
# the KB is processed by iteratively reading the entire KB file, then
# seeking back to the beginning of the file. As each assertion in the
# submission is processed, the program checks whether it can fulfill
# any of the tasks it has outstanding.  There are three kinds of
# tasks:
#  1. FindEntrypointTask: The task knows the docid and character
#     offset of one of the evaluation queries. It is fulfilled if the
#     assertion is a mention, and the characters of the mention include
#     the entrypoint character.
#  2. FillSlotTask: The task knows a KB entity and a predicate
#     name. It is fulfilled if the assertion is of that type, and has the
#     desired entity as subject.
#  3. Entity2NameTask: The task knows an entity and a docid.  It is
#     fulfilled if the assertion is a canonical_mention for that entity
#     in that document.
# Processing starts by creating a task manager and adding a
# FindEntrypointTask for each evaluation query to it. The KB
# submission file is then processed one assertion at a time. If the
# assertion fulfills any task that is active in the task manager, the
# action associated with that task is executed. The actions for the
# three types of task are:
#  - FindEntrypointTask: Create a FillSlotTask for the first relation
#    type in the evaluation query, starting from the entity that matched
#    the entry point character. Add the task to the task manager.
#  - FillSlotTask: Create and add to the task manager an
#    Entity2NameTask, which will find the canonical mention for the
#    object of the matched relation in the document that supports the
#    the relation. If the object is not an entity (e.g., for a
#    relation that takes a string filler), just record it as a slot
#    fill. If there are more hops to be found in the evaluation query,
#    it also creates another FillSlotTask that starts at the object of
#    the assertion that matched, and traverses the next slot in the
#    query.
#  - Entity2NameTask: Add the string found as canonical mention as a
#    slot fill.
# As a courtesy, the task manager keeps track of all the successfully
# matched slot fills.  Once a task has been matched, or, in the case
# of a FillSlotTask, once it has been tested against each assertion in
# the KB submission, it is deleted from the task manager. Thus, once
# the task manager has no more tasks, it terminates and returns the
# slot fills it found.

# Each type of task has a matching routine that tests whether an
# assertion fulfills any of the active tasks of that type. They are
# stored globally here for some reason.
my @retrievers;

##################################################################################### 
# Patterns
##################################################################################### 

# Eliminate comments, ensuring that pound signs in the middle of
# strings are not treated as comment characters
# Here is the original slightly clearer syntax that unfortunately doesn't work with Perl 5.8
# s/^(
# 	(?:[^#"]*+		      # Any number of chars that aren't double quote or pound sign
# 	  (?:"(?:[^"\\]++|\\.)*+")?   # Any number of double quoted strings
# 	)*+			      # The pair of them repeated any number of times
#   )				      # Everything up to here is captured in $1
#   (\#.*)$/$1/x;		      # Pound sign through the end of the line is not included in the replacement
my $comment_pattern = qr/
      ^(
	(?>
	  (?:
	    (?>[^#"]*)		      # Any number of chars that aren't double quote or pound sign
	    (?:"		      # Beginning of double quoted string
	      (?>		      # Start a possessive match of the string body
		(?:(?>[^"\\]+)|\\.)*  # Possessively match any number of non-double quotes or match an escaped char
	      )"		      # Possessively match the above repeatedly, before the closing double quote
	    )?			      # There might or might not be a double quoted string
	  )*			      # The pair of them repeated any number of times
	)			      # Possessively match everything before a pound sign that starts the comment
      )				      # Everything up to here is captured in $1
      (\#.*)$/x;		      # Pound sign through the end of the line is not included in the replacement


##################################################################################### 
# Evaluation Queries
##################################################################################### 

# Read the evaluation queries file, building a hash to represent each,
# and returning a list of all the queries read
sub load_evaluation_queries {
  my ($filename) = @_;
  my $result = [];
  # Slurp the entire text
  open(my $infile, "<:utf8", $filename) or die "Could not open $filename: $!";
  local($/);
  my $text = <$infile>;
  close $infile;
  # Repeatedly look for text that lies between <query> and </query>
  # tags.  Pull out from each the query id, and the list of attributes
  # of that query
  while ($text =~ /<query\s*id="(.*?)">\s*(.*?)\s*<\/query>/gs) {
    my $id = $1;
    my $fields = $2;
    # Now break each query into its separate fields. Those fields
    # appear in key-value order, so we can just capture both values
    # and shove the whole list directly into a hash
    my $struct =  {$fields =~ /^\s*<(.*?)>([^<]*)</gm};
    push(@{$result}, $struct);
    # The Query ID is not a field, but comes from the <query> tag. So
    # we add it to the result explicitly
    $struct->{queryid} = $id;
    # Find all the slots for this query (different evaluation queries
    # may have different numbers of slots to be traversed). Collect
    # them in a list to make it easier for a FillSlotTask to step
    # through the desired slots
    for (my $i = 0; ; $i++) {
      # Slots are all named something like 'slot3'
      my $key = "slot$i";
      if (defined $struct->{$key}) {
	push (@{$struct->{slots}}, $struct->{$key});
      } else {
	last;
      }
    }
  }
  $result;
}

##################################################################################### 
# Task
##################################################################################### 

# This is the base class for the three task types

package Task;

{
  my $next_id = "00000";

  sub new {
    my ($class, $query, $description, $parent) = @_;
    my $self = {QUERY => $query, DESCRIPTION => $description, ID => $next_id++};
#    $self->{HOP} = (defined $parent ? $parent->{HOP} + 1 : 0);
    bless($self, $class);
    $self;
  }

}

##################################################################################### 
# FindEntrypointTask
##################################################################################### 

package FindEntrypointTask;

# Inherit from Task. Use -norequire because Task is defined in this file.
use parent -norequire, 'Task';

sub new {
  my ($class, $query) = @_;
  my $self = $class->SUPER::new($query, "$class($query->{docid}, $query->{offset})");
  bless($self, $class);
  $self;
}

# FindEntrypointTasks are indexed only under docid; matching the entry
# point character to the extent of a mention is done at lookup time
sub add_to_index {
  my ($self, $taskset) = @_;
  push(@{$taskset->{INDICES}{FindEntrypoint}{$self->{QUERY}{docid}}}, $self);
}

# Determine whether this mention includes the evaluation query offset
sub match {
  my ($self, $assertion) = @_;
  $self->{QUERY}{offset} >= $assertion->{start} && $self->{QUERY}{offset} <= $assertion->{end};
}

# Find FindEntrypointTasks in the index that match the given assertion
push(@retrievers, sub  {
                    my ($taskset, $assertion) = @_;
		    return () unless $assertion->{predicate} eq 'mention';
		    grep {$_->match($assertion)} @{$taskset->{INDICES}{FindEntrypoint}{$assertion->{docid}} || []};
                  });

# Remove this FindEntrypointTask from the TaskSet. This only reverses
# add_to_index; indexing by the position of the assertion in the KB
# file is handled by the TaskSet itself
sub remove_from_index {
  my ($self, $taskset) = @_;
  $taskset->{INDICES}{FindEntrypoint}{$self->{QUERY}{docid}} =
    [grep {$_ != $self} @{$taskset->{INDICES}{FindEntrypoint}{$self->{QUERY}{docid}}}];
}

# We've found an assertion that matches this
# FindEntrypointTask. Create a task to fill the first slot of the
# evaluation query, and remove this task from the TaskSet
sub execute {
  my ($self, $taskset, $assertion) = @_;
  my $task = FillSlotTask->new($self->{QUERY}, 'NIL', $assertion->{entity}, 'NIL', @{$self->{QUERY}{slots}});
  $taskset->add_task($task, $assertion->{position});
  # There is only one fill for this task
  $taskset->remove($self);
  $taskset->{STATS}{ENTRYPOINTS_FOUND}++;
}

##################################################################################### 
# FillSlotTask
##################################################################################### 

package FillSlotTask;

use parent -norequire, 'Task';

sub new {
  my ($class, $query, $predecessor, $entity, $predecessor_assertion, @slots) = @_;
  die "Attempt to create $class with no slot list" unless @slots;
  my $self = $class->SUPER::new($query, "$class(" . join(", ", @slots). ") from " . join(", ", caller), $predecessor);
  $self->{ENTITY} = $entity;
  $self->{QUERY} = $query;
  $self->{PREDECESSOR} = $predecessor;
  $self->{PREDECESSOR_ASSERTION} = $predecessor_assertion;
  $self->{SLOT} = shift @slots;
  $self->{SLOTS} = [@slots];
  $self->{HOP} = $predecessor eq 'NIL' ? '00' :  sprintf("%02d", $predecessor->{HOP} + 1);
  bless($self, $class);
  $self;
}

# FillSlotTasks are indexed by entity and predicate
sub add_to_index {
  my ($self, $taskset) = @_;
  my $pred = ($self->{PREDECESSOR} eq 'NIL' ? $self->{QUERY}{queryid} : $self->{PREDECESSOR}{ID});
# unless (defined $pred) {
# print STDERR "PREDECESSOR = $self->{PREDECESSOR}\n";
# print STDERR "queryid = ", $self->{QUERY}{queryid} || 'undef', "\n";
# print STDERR "predid = ", $self->{
  $taskset->{STATS}{"HOP$self->{HOP}_UNIQUE"}++ unless $taskset->{DEDUP}{HOP_UNIQUE}{$self->{HOP}}{$pred}++;
  $taskset->{STATS}{"HOP$self->{HOP}_TOTAL"}++ unless $taskset->{DEDUP}{HOP_TOTAL}{$self->{HOP}}{$self->{ID}}++;
  push(@{$taskset->{INDICES}{FillSlot}{$self->{ENTITY}}{$self->{SLOT}}}, $self);
}

push(@retrievers, sub  {
                    my ($taskset, $assertion) = @_;
		    # Slot names have colons; others (such as mention or type) do not
		    return () unless $assertion->{predicate} =~ /:/;
		    @{$taskset->{INDICES}{FillSlot}{$assertion->{entity}}{$assertion->{predicate}} || []};
                  });

sub remove_from_index {
  my ($self, $taskset) = @_;
  $taskset->{INDICES}{FillSlot}{$self->{ENTITY}}{$self->{SLOT}} =
    [grep {$_ != $self} @{$taskset->{INDICES}{FillSlot}{$self->{ENTITY}}{$self->{SLOT}}}];
}

sub execute {
  my ($self, $taskset, $assertion) = @_;
  # If the object of the assertion begins with a colon, it represents
  # an entity.
  if ($assertion->{object} =~ /^:/) {
    # If there are hops that have not yet been traversed, create a task to find the next one
    if (@{$self->{SLOTS}}) {
      my $task = FillSlotTask->new($self->{QUERY}, $self, $assertion->{object}, $assertion, @{$self->{SLOTS}});
      $taskset->add_task($task, $assertion->{position});
    }
    else {
      $taskset->{STATS}{FINAL_ENTITY_FILLS_FOUND}++;
      $taskset->{STATS}{FINAL_UNIQUE_ENTITY_FILLS_FOUND}++ unless $taskset->{DEDUP}{FINAL_UNIQUE_FILLS}{$self->{ID}}++;
      $taskset->{STATS}{FINAL_TOTAL_FILLS_FOUND}++;
      $taskset->{STATS}{FINAL_TOTAL_UNIQUE_FILLS_FOUND}++ unless $taskset->{DEDUP}{FINAL_UNIQUE_FILLS}{$self->{ID}}++;
    }
    # Whether this is the final hop or not, find the canonical mention
    # for this entity in the supporting document. That allows the
    # query thus far to be treated independently as a shorter query
    my $task = Entity2NameTask->new($self->{QUERY}, $self, $assertion->{object}, $assertion->{docid}, $assertion);
    $taskset->add_task($task, $assertion->{position});
  }
  else {
    # If this is not an entity, it is a regular slot fill; there is no
    # need to look for a canonical mention. Add it to the set of
    # results
    $taskset->add_fill($self, $assertion);
    $taskset->{STATS}{FILLS_FOUND}++;
    $taskset->{STATS}{FINAL_STRING_FILLS_FOUND}++;
    $taskset->{STATS}{FINAL_UNIQUE_STRING_FILLS_FOUND}++ unless $taskset->{DEDUP}{FINAL_UNIQUE_FILLS}{$self->{ID}}++;
    $taskset->{STATS}{FINAL_TOTAL_FILLS_FOUND}++;
    $taskset->{STATS}{FINAL_TOTAL_UNIQUE_FILLS_FOUND}++ unless $taskset->{DEDUP}{FINAL_UNIQUE_FILLS}{$self->{ID}}++;
  }
  # Note that we do not remove this task from the taskset yet; slot fills
  # can be filled multiple times
}

##################################################################################### 
# Entity2NameTask
##################################################################################### 

package Entity2NameTask;

use parent -norequire, 'Task';

sub new {
  my ($class, $query, $parent, $entity, $docid, $parent_assertion) = @_;
  my $self = $class->SUPER::new($query, "$class($entity)", $parent);
  $self->{ENTITY} = $entity;
  $self->{DOCID} = $docid;
  $self->{PARENT} = $parent;
  $self->{PARENT_ASSERTION} = $parent_assertion;
  bless($self, $class);
  $self;
}

# Entity2NameTasks are indexed by entity and docid; they look for the
# appropriate canonical_mention
sub add_to_index {
  my ($self, $taskset) = @_;
  push(@{$taskset->{INDICES}{Entity2Name}{$self->{ENTITY}}{$self->{DOCID}}}, $self);
}

push(@retrievers, sub  {
                    my ($taskset, $assertion) = @_;
		    return () unless $assertion->{predicate} eq 'canonical_mention';
		    @{$taskset->{INDICES}{Entity2Name}{$assertion->{entity}}{$assertion->{docid}} || []};
                  });

sub remove_from_index {
  my ($self, $taskset) = @_;
  $taskset->{INDICES}{Entity2Name}{$self->{ENTITY}}{$self->{DOCID}} =
    [grep {$_ != $self} @{$taskset->{INDICES}{Entity2Name}{$self->{ENTITY}}{$self->{DOCID}}}];
}

sub execute {
  my ($self, $taskset, $assertion) = @_;
  $taskset->add_fill($self->{PARENT}, $self->{PARENT_ASSERTION}, $self, $assertion);
  # There is only one fill for this task, so the task can be deleted immediately
  $taskset->remove($self);
}

##################################################################################### 
# TaskSet
##################################################################################### 

# A TaskSet maintains a set of open tasks. It processes the assertions
# in a KB file one at a time, looking for assertions that fulfill any
# of the tasks. If the end of the KB file is reached, seek is used to
# return to the beginning, and processing continues. If all assertions
# have been matched to a task, the task is deleted.

package TaskSet;

# COUNT is the number of open tasks currently in the TaskSet
# OUTFILE is the file handle to which output should be sent
sub new {
  my ($class, $outfile) = @_;
  my $self = {COUNT => 0, OUTFILE => $outfile};
  bless($self, $class);
  $self;
}

# Keep track of which run is currently being processed
sub set_runid {
  my ($self, $runid) = @_;
  $self->{RUNID} = $runid;
}

# Include a new task in the set of open tasks
sub add_task {
  my ($self, $task, $position) = @_;
  die "You forgot to set the runid for the TaskSet!" unless defined $self->{RUNID};
  # POSITION is the location in the input file at the time the task is
  # added. The next time we return to that place in the file, we can
  # remove this task (since we will have compared all assertions in
  # the file to the task description)
  $task->{POSITION} = $position;
  # Index this task according to the current position. We use a hash
  # rather than an array, because the entries will be sparse
  push(@{$self->{POSITIONS}{$position}}, $task);
  $self->{COUNT}++;
  # We have indexed this task according to its position in the
  # file. Now we also index it according to the particular type of
  # task
  $task->add_to_index($self);
  $self->{STATS}{TOTAL_TASKS}++;
}

# Convert an evaluation query to its initial FindEntryPointTask and
# add it to the set of current tasks
sub add_evaluation_query {
  my ($self, $query) = @_;
  my $initial_task = FindEntrypointTask->new($query);
  $self->add_task($initial_task, 0);
}

# Number of open tasks
sub get_num_active_tasks {
  $_[0]->{COUNT};
}

# Find any open tasks that match the assertion, by invoking each of
# the retrieval routines stored in @retrievers
sub retrieve_tasks {
  my ($self, $assertion) = @_;
  my @result = ();
  foreach my $retriever (@retrievers) {
    push(@result, &{$retriever}($self, $assertion));
  }
  @result;
}

# We're done with this task, either because we tried all the
# assertions, or because its execute routine was satisfied and asked
# for the deletion
sub remove {
  my ($self, $task) = @_;
  $task->remove_from_index($self);
  my $position = $task->{POSITION};
  # remove_at_position might already have removed the task from POSITIONS
  if (defined $self->{POSITIONS}{$position}) {
    $self->{POSITIONS}{$position} = [grep {$_ != $task} @{$self->{POSITIONS}{$position}}];
  }
  $self->{COUNT}--;
}

# Delete all tasks that started at the current position (we have
# looped around and tried each of the assertions in the file, so we
# should stop looking)
sub remove_at_position {
  my ($self, $position) = @_;
  # Delete tasks at this position in bulk (delete returns the deleted tasks)
  my $tasks = delete $self->{POSITIONS}{$position};
  if (defined $tasks) {
    foreach (@{$tasks}) {
      $self->remove($_);
    }
  }
}

# A filler might have tabs or newlines, which can't appear in the
# assessment files.  It is also likely to be surrounded by double
# quotes, which must be removed (along with any escaped characters in
# the string)
sub normalize_filler {
  my ($filler) = @_;
  if ($filler =~ /^"(.*)"$/) {
    $filler = $1;
    $filler =~ s/\\(.)/$1/g;
  }
  $filler =~ s/\s/ /gs;
  $filler;
}

# Here are the columns that LDC is expecting:
#
# Column 1: response ID 
# Column 2: query and hop ID (e.g. CS_ENG_001_00)
# Column 3: parent ID (ID for the filler from the previous hop level now acting as the entity; NIL if 0 hop) 
# Column 4: slot name 
# Column 5: docid that justifies the relation between the parent entity and the slot filler 
# Column 6: [possibly normalized] slot filler string 
# Column 7: start offset of unnormalized filler 
# Column 8: end offset of unnormalized filler 
# Column 9: start offset of justification
# Column 10: end offset of justification 
# + 
# Column 11: judgment for slot filler string (Correct, Redundant w/KB, Inexact, Wrong) 
# Column 12: judgment for justification (Correct, Inexact, Wrong) 
# Column 13: equivalence class for slot filler string if Correct (0 otherwise) 

# Keep track of the filled slots that have been found. We collect them
# in the TaskSet just as a convenience, since all the routines that
# need to return a filled slot already have access to it
sub add_fill {
  my ($self, $task, $assertion, $name_task, $name_assertion) = @_;
  # First, we construct each of the values in Columns 1-10 (the later
  # columns are produced by LDC during assessment)
  my $response_id = "$task->{ID}_$assertion->{position}";
  my $query_and_hop = $task->{QUERY}{queryid} . "_" . $task->{HOP};
  my $parent_id;
  if ($task->{PREDECESSOR} eq 'NIL') {
    $parent_id = 'NIL';
  }
  else {
    # We can't easily store the ID of the predecessor of this entry
    # (i.e., the object of the previous hop). So instead we recreate
    # what it must be (the task ID followed by the position in the KB
    # file)
    $parent_id = "$task->{PREDECESSOR}{ID}_$task->{PREDECESSOR_ASSERTION}{position}";
  }
  my $slot_name = $task->{SLOT};
  my $docid = $assertion->{docid};
  my $filler;
  my $fstart;
  my $fend;
  # This routine either receives a single task and matching assertion
  # (if this a string-valued slot) or two such pairs, one for the
  # final hop in the query and one bearing the canonical_mention for
  # the slot fill.
  if (defined $name_task) {
    $filler = &normalize_filler($name_assertion->{object});
    $fstart = $name_assertion->{start};
    $fend = $name_assertion->{end};
  }
  else {
    $filler = &normalize_filler($assertion->{object});
    $fstart = $assertion->{object_start};
    $fend = $assertion->{object_end};
  }
  my $jstart = $assertion->{predicate_start};
  my $jend = $assertion->{predicate_end};
  # We've calculated all of the necessary values, so print the result
  my $outfile = $self->{OUTFILE};
  print $outfile join("\t", ($response_id,
			     $query_and_hop,
			     $parent_id,
			     $slot_name,
			     $docid,
			     $filler,
			     $fstart,
			     $fend,
			     $jstart,
			     $jend)), "\n";
}

package main;

# For each type of relation that can appear in a KB submission file,
# this table indicates the names and order of the columns
my %predicate2labels = (
  type =>              [qw(entity predicate object confidence)],
  mention =>           [qw(entity predicate object docid start end confidence)],
  canonical_mention => [qw(entity predicate object docid start end confidence)],
  default =>           [qw(entity predicate object docid entity_start entity_end
                           predicate_start predicate_end object_start object_end confidence)],
);

# Convert an assertion to a hash that holds the various fields of the assertion
sub parse_assertion {
  my ($line, $position) = @_;
  # The spec didn't actually require a single tab between entries, so
  # we must ditch any fields that don't contain text
  my (@entries) = grep {/\S/} split(/\t/, $line);
  # Some folks include a confidence on type assertions, others
  # don't. So add a confidence if none is present to make all
  # assertions uniform
  push(@entries, "1.0") unless $entries[-1] =~ /\./;
  # Get the list of expected columns in the assertion statement
  my $predicate = lc $entries[1];
  my $labels = $predicate2labels{$predicate} || $predicate2labels{default};
  # Make sure the number of values provided matches the number
  # expected.  This should always be true if the Validator has been
  # run, but do the check anyway just to make sure
  if (@{$labels} != @entries) {
    print STDERR "\nlabels = (", join(", ", @{$labels}), "); entries = (", join(", ", @entries), ")\n";
    die "Wrong number of arguments for predicate $predicate";
  }
  # Create the hash
  my $result = {map {$labels->[$_] => $entries[$_]} 0..$#{$labels}};
  # Add the description and position fields, which are metadata about
  # the assertion that do not appear in it
  $result->{description} = "$result->{predicate}($result->{entity}, $result->{object}) ---> <<$line>>";
  $result->{position} = $position;
  $result;
}

# Cycle back to the top of the KB file, find the run ID in the first
# line, skip over that line (so that we don't try to interpret the run
# ID as an assertion), and return the run ID (which may well be
# ignored, but we don't care)
sub seek_to_start {
  my ($infile, $taskset) = @_;
  seek($infile, 0, 0) or die "Could not seek to beginning of file";
  my $runid = <$infile>;
  chomp $runid;
  $runid =~ s/$comment_pattern/$1/;
  $runid =~ s/^\s+//;
  $runid =~ s/\s+$//;
  die "No runid found" unless $runid;
  # The initial tasks for the evaluation queries go in at position
  # zero. Make sure we don't forget to delete them once we've read
  # through the file the first time
  $taskset->remove_at_position(0);
  $runid;
}

# Look to fulfill each of the evaluation queries in the current run file
sub process_runfile {
  my ($runfile, $evaluation_queries) = @_;
  # FIXME: Should probably use a switch for this, rather than just
  # dumping the output into the runfile directory
#  my $outputfile = "$runfile.derived10.ldc.tab.txt";
  my $outputfile = "$runfile.ldc.tab.txt";
  open(my $infile, "<:utf8", $runfile) or die "Could not open $runfile: $!";
  open(my $outfile, ">:utf8", $outputfile) or die "Could not open $outputfile: $!";
  # Create a new task set
  my $taskset = TaskSet->new($outfile);
  # Call seek_to_start to skip over the run ID.  seek_to_start wipes
  # out tasks at position 0, so add evaluation queries afterward
  my $runid = &seek_to_start($infile, $taskset);
  $taskset->set_runid($runid);
  foreach my $evaluation_query (@{$evaluation_queries}) {
    $taskset->add_evaluation_query($evaluation_query);
  }
  # Main loop for stepping through the KB file.  We're done when no
  # active tasks remain
  while ($taskset->get_num_active_tasks()) {
    # Get the position of the assertion we're about to read in
    my $tell = tell($infile);
    # Remove any assertions already at this position; we've gone
    # through the entire file with them
    $taskset->remove_at_position($tell);
    # Get the next assertion
    local $_ = <$infile>;
    # If we didn't get anything, we're at the end of the KB file, so
    # seek back to the start and continue
    if (!defined $_) {
      &seek_to_start($infile, $taskset);
      next;
    }
    chomp;
    # KB files may contain comments.  Delete them, but make sure to
    # handle double-quoted strings properly
    s/$comment_pattern/$1/;
    next unless /\S/;
    my $assertion = &parse_assertion($_, $tell);
    # Find any open tasks that are fulfilled by this assertion
    my @tasks = $taskset->retrieve_tasks($assertion);
    # If any are found, run the execute method on them
    foreach my $task (@tasks) {
      $task->execute($taskset, $assertion);
    }
  }
  close $infile;
  close $outfile;
  print STDERR "$taskset->{RUNID}:\n";
  foreach (sort keys %{$taskset->{STATS}}) {
    print STDERR "\t$_ = ", $taskset->{STATS}{$_} || 0, "\n";
  }
}

@ARGV >= 2 or die "Usage: perl $0 <queryfile> <runfiles...>";
my ($query_filename, @runfiles) = @ARGV;
my $queries = &load_evaluation_queries($query_filename);
foreach my $runfile (@runfiles) {
  print STDERR "WARNING: $runfile might not be a Cold Start run file\n" if $runfile =~ /\./ && $runfile !~ /\.valid$/;
  &process_runfile($runfile, $queries);
}

1;