#!/usr/bin/env python # Copyright (c) 2020 Marc Vinyals # This program is free software licensed under the GPL version 3 or later. # https://www.gnu.org/licenses/gpl-3.0.html import sys import itertools import math import argparse import functools import numpy as np import scipy as sp import scipy.spatial.distance import pandas as pd import matplotlib.pyplot as plt import bokeh, bokeh.plotting, bokeh.models, bokeh.embed from mpldatacursor.mpldatacursor import datacursor def is_number(s): try: float(s) except ValueError: return False return True def is_solved(x): if 'verifier result' in x: return (x['result'] == "SAT-VERIFIED" or x['verifier result'] == "UNSAT-VERIFIED") elif 'status' in x: return x['status'] == "complete" def is_solved_pm1(x): return 1 if is_solved(x) else -1 def raw_status(s): if s==10000: return "timeout" if is_number(s): return "complete" return s # PAR = "penalized average runtime" def par_score(factor, timeout, x): return (x['solver time'] if is_solved(x) else factor*timeout) def sort_by_sum(df, ascending): return df.assign(sum=df.sum(axis=1)).sort_values(by='sum', ascending=ascending)#.iloc[:, :-1] def parse_sat_results(stream): df = pd.read_csv(stream, index_col = ["solver","configuration","benchmark"], skipinitialspace=True) df.rename(columns={'checker result': 'verifier result', 'cpu time': 'solver time', 'time': 'solver time'}, inplace=True) return df def distance_matrix(df, make_values, ascending, distance_function): df['new_values'] = df.apply(make_values, axis=1) df = sort_by_sum(df.unstack()['new_values'], ascending) #print(df,file=sys.stderr) df = df.iloc[:,:-1] n = len(df) vectors = [df.iloc[i].to_numpy() for i in range(n)] m = np.empty((n,n)) for i,j in itertools.product(range(n),range(n)): # sum >0 entries m[i][j] = distance_function(vectors[i],vectors[j]) labels = [_ if isinstance(_, str) else (_[0]+(_[1] if _[1] != 'default' else '')) for _ in df.index.values] return pd.DataFrame(data=m, index=labels, columns=labels) def parse_sat_results_2021(stream): df = pd.read_csv(stream, index_col = ["hash"]) df = df.drop(["filename","benchmark","verified-result","claimed-result","vresult"], axis=1, errors='ignore') df = df.transpose() df = df.stack().to_frame('raw') df['status'] = df.apply(lambda x : raw_status(x['raw']), axis=1) df['solver time'] = df.apply(lambda x : float(x['raw']) if x['status']=="complete" else 5000, axis=1) return df def plot_interactive(df): m = df.to_numpy() vmax = 1.5*max(m[:,-1]) plt.imshow(df.to_numpy(), cmap='gist_rainbow',interpolation='none',vmin=0,vmax=vmax) plt.colorbar() datacursor(formatter=lambda **kwargs : "{} better than {} by {:.0f}".format(df.index.values[kwargs['i']], df.index.values[kwargs['j']], kwargs['z'] )) plt.show() def plot_batch(df): vmax = 1.5*max(df.to_numpy()[:,-1]) vmax = 50 if vmax<1000 else 500000 labels=list(df.index.values) df_ = pd.DataFrame(df.stack()).reset_index() df_.columns=['better', 'worse', 'score'] source = bokeh.models.ColumnDataSource(df_) mapper = bokeh.models.LinearColorMapper(palette="Turbo256", low=0, high=vmax) tooltips = [('better', '@better'), ('worse', '@worse'), ('score', '@score')] plot = bokeh.plotting.figure(frame_width=600, frame_height=600, x_range=labels, y_range=list(reversed(labels)), tools="hover", tooltips=tooltips, match_aspect=True) plot.axis.visible = False plot.rect(x="worse", y="better", width=1, height=1, source=source, line_color=None, fill_color=bokeh.transform.transform('score', mapper)) color_bar = bokeh.models.ColorBar(color_mapper=mapper, location=(0,0)) color_bar.formatter = bokeh.models.formatters.NumeralTickFormatter(format='0a') plot.add_layout(color_bar, 'right') return plot #bokeh.plotting.show(plot) def sum_positive(v,w): return sum(filter(lambda x: x>0, w-v)) def count_positive(v,w): return sum(1 for _ in filter(lambda x: x>0, v-w)) def plot_par2(df): timeout = df['solver time'].max() print(timeout,file=sys.stderr) return distance_matrix(df, functools.partial(par_score, 2, timeout), False, sum_positive) def plot_solved_symm(df): return distance_matrix(df, is_solved_pm1, True, sp.spatial.distance.hamming) def plot_solved(df): return distance_matrix(df, is_solved_pm1, True, count_positive) def interactive(): parser = argparse.ArgumentParser() parser.add_argument('score', nargs='?', default='par2') args = parser.parse_args() df = parse_sat_results(sys.stdin) df = {'solved': plot_solved, 'par2': plot_par2}[args.score](df) plot_interactive(df) boilerplate=''' Solver Geometry Heatmaps {script} {explanation} {divs} ''' explanation='''

Solver Geometry Heatmaps

These plots have the goal of identifying solvers that are promising because they do something different, that work when other solvers fail, even if they do not achieve the best ranking in a competition.

More preciely, these are heatmap plots of parwise asymmetric distances between the solvers that participated in recent SAT competitions. Given a metric \\(m\\) such as solved instances or PAR2 score, the asymmetric distance from solver 1 to solver 2 is $$\sum_{i\in\mathrm{instances}} \max(0,m_2(i) - m_1(i))$$, that is the sum of the differences over instances where solver 2 is better than solver 1.

In the plots, solver 1 ("worse") and solver 2 ("better") correspond to columns and rows respectively. This means that we can see by how much a solver beats others by looking at its row, and we can see by how much a solver gets beaten by looking at its column (hover over the plots to see names). Entries are sorted so that the best solver takes the bottom row and the rightmost column.

For instance, in the plots corresponding to 2018, we see how even if the local search solver YalSAT is usually a terrible choice, it is well worth to keep handy in one's toolbox for the not-so-odd case when it beats every other option. We can also see how it is very infrequent for any solver to completely dominate any other solver.

Source: solver_geometry.py.

''' entry='''

SAT {year}

Main track

Solved

{plot1}

PAR2

{plot2}
''' def batch(): plots = [] names = {2016: "Competition", 2017: "Competition", 2018: "Competition", 2019: "Race", 2020: "Competition", 2021: "Competition", 2022: "Competition", 2023: "Competition"} years = names.keys() for year in years: filename = "results{}.csv".format(year) f = open(filename, "r") if year<=2020: df = parse_sat_results(f) else: df = parse_sat_results_2021(f) df1 = plot_solved(df) df2 = plot_par2(df) plots.append(plot_batch(df1)) plots.append(plot_batch(df2)) script, divs = bokeh.embed.components(plots) body = [] idivs=iter(divs) for (year, (plot1,plot2)) in zip(years, zip(idivs,idivs)): body.append(entry.format(year=names[year] + " " + str(year),plot1=plot1,plot2=plot2)) print(boilerplate.format(script=script,explanation=explanation,divs="".join(body))) batch()