#!/usr/bin/env python

# Copyright (c) 2020 Marc Vinyals
# This program is free software licensed under the GPL version 3 or later.
# https://www.gnu.org/licenses/gpl-3.0.html

import sys
import itertools
import math
import argparse
import functools
import numpy as np
import scipy as sp
import scipy.spatial.distance
import pandas as pd
import matplotlib.pyplot as plt
import bokeh, bokeh.plotting, bokeh.models, bokeh.embed
from mpldatacursor.mpldatacursor import datacursor

def is_number(s):
    try:
        float(s)
    except ValueError:
        return False
    return True

def is_solved(x):
    if 'verifier result' in x:
        return (x['result'] == "SAT-VERIFIED"
                or x['verifier result'] == "UNSAT-VERIFIED")
    elif 'status' in x:
        return x['status'] == "complete"

def is_solved_pm1(x):
    return 1 if is_solved(x) else -1

def raw_status(s):
    if s==10000:
        return "timeout"
    if is_number(s):
        return "complete"
    return s

# PAR = "penalized average runtime"
def par_score(factor, timeout, x):
    return (x['solver time'] if is_solved(x)
            else factor*timeout)

def sort_by_sum(df, ascending):
    return df.assign(sum=df.sum(axis=1)).sort_values(by='sum', ascending=ascending)#.iloc[:, :-1]

def parse_sat_results(stream):
    df = pd.read_csv(stream, index_col = ["solver","configuration","benchmark"], skipinitialspace=True)
    df.rename(columns={'checker result': 'verifier result', 'cpu time': 'solver time', 'time': 'solver time'}, inplace=True)
    return df

def distance_matrix(df, make_values, ascending, distance_function):
    df['new_values'] = df.apply(make_values, axis=1)
    df = sort_by_sum(df.unstack()['new_values'], ascending)
    #print(df,file=sys.stderr)
    df = df.iloc[:,:-1]
    n = len(df)
    vectors = [df.iloc[i].to_numpy() for i in range(n)]

    m = np.empty((n,n))
    for i,j in itertools.product(range(n),range(n)):
        # sum >0 entries
        m[i][j] = distance_function(vectors[i],vectors[j])

    labels = [_ if isinstance(_, str) else (_[0]+(_[1] if _[1] != 'default' else '')) for _ in df.index.values]
    return pd.DataFrame(data=m, index=labels, columns=labels)

def parse_sat_results_2021(stream):
    df = pd.read_csv(stream, index_col = ["hash"])
    df = df.drop(["filename","benchmark","verified-result","claimed-result","vresult"], axis=1, errors='ignore')
    df = df.transpose()
    df = df.stack().to_frame('raw')
    df['status'] = df.apply(lambda x : raw_status(x['raw']), axis=1)
    df['solver time'] = df.apply(lambda x : float(x['raw']) if x['status']=="complete" else 5000, axis=1)
    return df

def plot_interactive(df):
    m = df.to_numpy()
    vmax = 1.5*max(m[:,-1])
    plt.imshow(df.to_numpy(), cmap='gist_rainbow',interpolation='none',vmin=0,vmax=vmax)
    plt.colorbar()
    datacursor(formatter=lambda **kwargs :
               "{} better than {} by {:.0f}".format(df.index.values[kwargs['i']],
                                                    df.index.values[kwargs['j']],
                                                    kwargs['z']
               ))
    plt.show()


def plot_batch(df):
    vmax = 1.5*max(df.to_numpy()[:,-1])
    vmax = 50 if vmax<1000 else 500000
    labels=list(df.index.values)
    df_ = pd.DataFrame(df.stack()).reset_index()
    df_.columns=['better', 'worse', 'score']
    source = bokeh.models.ColumnDataSource(df_)

    mapper = bokeh.models.LinearColorMapper(palette="Turbo256", low=0, high=vmax)
    tooltips = [('better', '@better'), ('worse', '@worse'), ('score', '@score')]

    plot = bokeh.plotting.figure(frame_width=600, frame_height=600,
                                 x_range=labels, y_range=list(reversed(labels)),
                                 tools="hover", tooltips=tooltips, match_aspect=True)
    plot.axis.visible = False

    plot.rect(x="worse", y="better", width=1, height=1, source=source,
       line_color=None, fill_color=bokeh.transform.transform('score', mapper))

    color_bar = bokeh.models.ColorBar(color_mapper=mapper, location=(0,0))
    color_bar.formatter = bokeh.models.formatters.NumeralTickFormatter(format='0a')
    plot.add_layout(color_bar, 'right')

    return plot
    #bokeh.plotting.show(plot)

def sum_positive(v,w):
    return sum(filter(lambda x: x>0, w-v))

def count_positive(v,w):
    return sum(1 for _ in filter(lambda x: x>0, v-w))

def plot_par2(df):
    timeout = df['solver time'].max()
    print(timeout,file=sys.stderr)
    return distance_matrix(df, functools.partial(par_score, 2, timeout), False, sum_positive)

def plot_solved_symm(df):
    return distance_matrix(df, is_solved_pm1, True, sp.spatial.distance.hamming)

def plot_solved(df):
    return distance_matrix(df, is_solved_pm1, True, count_positive)

def interactive():
    parser = argparse.ArgumentParser()
    parser.add_argument('score', nargs='?', default='par2')
    args = parser.parse_args()
    df = parse_sat_results(sys.stdin)
    df = {'solved': plot_solved, 'par2': plot_par2}[args.score](df)
    plot_interactive(df)

boilerplate='''<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>Solver Geometry Heatmaps</title>
        <meta name="viewport" content="width=700, initial-scale=1">
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-2.4.2.min.js"></script>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.css" integrity="sha384-AfEj0r4/OFrOo5t7NnNe46zW/tFgW6x/bCJG8FqQCEo3+Aro6EYUG4+cU+KJWu/X" crossorigin="anonymous">
    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.js" integrity="sha384-g7c+Jr9ZivxKLnZTDUhnkOnsh30B4H0rpLUpJ4jAIKs4fnJI+sEnkvrMWph2EDg4" crossorigin="anonymous"></script>
    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/contrib/auto-render.min.js" integrity="sha384-mll67QQFJfxn0IYznZYonOWZ644AWYC+Pt2cHqMaRhXVrursRwvLnLaebdGIlYNa" crossorigin="anonymous"
        onload="renderMathInElement(document.body);"></script>
        {script}
        <style>
            .row-wrapper {{
              display: flex;
              flex-wrap: wrap;
              justify-content: space-evenly;
            }}
            p {{
              max-width: 50em;
              text-align: justify;
            }}
            body {{
              padding-bottom: 700px;
            }}
        </style>
    </head>
    <body>
{explanation}
{divs}
    </body>
</html>
'''
explanation='''
<h1>Solver Geometry Heatmaps</h1>
<p>
  These plots have the goal of identifying solvers that are promising because they do something different, that work when other solvers fail, even if they do not achieve the best ranking in a competition.
</p>
<p>
  More preciely, these are heatmap plots of parwise asymmetric distances between the solvers that participated in recent SAT competitions. Given a metric \\(m\\) such as solved instances or PAR2 score, the asymmetric distance from solver 1 to solver 2 is $$\sum_{i\in\mathrm{instances}} \max(0,m_2(i) - m_1(i))$$, that is the sum of the differences over instances where solver 2 is better than solver 1.
</p>
<p>
  In the plots, solver 1 ("worse") and solver 2 ("better") correspond to columns and rows respectively. This means that we can see by how much a solver beats others by looking at its row, and we can see by how much a solver gets beaten by looking at its column (hover over the plots to see names). Entries are sorted so that the best solver takes the bottom row and the rightmost column.
</p>
<p>
  For instance, in the plots corresponding to 2018, we see how even if the local search solver YalSAT is usually a terrible choice, it is well worth to keep handy in one's toolbox for the not-so-odd case when it beats every other option. We can also see how it is very infrequent for any solver to completely dominate any other solver.
</p>
<p>
Source: <a href="solver_geometry.py">solver_geometry.py</a>.
</p>
'''
entry='''<h2>SAT {year}</h2>
Main track
<div class="row-wrapper">
  <div>
    <h3>Solved</h3>
    {plot1}
  </div>
  <div>
    <h3>PAR2</h3>
    {plot2}
  </div>
</div>
'''
def batch():
    plots = []
    names = {2016: "Competition", 2017: "Competition", 2018: "Competition", 2019: "Race", 2020: "Competition", 2021: "Competition", 2022: "Competition", 2023: "Competition"}
    years = names.keys()
    for year in years:
        filename = "results{}.csv".format(year)
        f = open(filename, "r")
        if year<=2020:
            df = parse_sat_results(f)
        else:
            df = parse_sat_results_2021(f)
        df1 = plot_solved(df)
        df2 = plot_par2(df)
        plots.append(plot_batch(df1))
        plots.append(plot_batch(df2))
    script, divs = bokeh.embed.components(plots)
    body = []
    idivs=iter(divs)
    for (year, (plot1,plot2)) in zip(years, zip(idivs,idivs)):
        body.append(entry.format(year=names[year] + " " + str(year),plot1=plot1,plot2=plot2))
    print(boilerplate.format(script=script,explanation=explanation,divs="".join(body)))

batch()