Dealing¶

Sections covered:

3.3: Impact of detours on the solver’s total length
3.4: Comparison of B&C and HGS solutions
3.5: Dealing with crossings from HGS-CVRP

This is used in the paper Flexible cable routing framework for wind farm collection system optimization.

Preamble¶

[1]:

import urllib.request
import lzma
import shutil
import os
from pathlib import Path
import numpy as np
import polars as pl

[2]:

from optiwindnet.db import database_connection, G_from_routeset, NodeSet, RouteSet, Method
from optiwindnet.svg import svgplot

[3]:

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

%config InlineBackend.figure_formats = ['svg']
plt.rcParams['svg.fonttype'] = 'none'

style_file = Path('mplstyles/jupyter_dark.mplstyle')
style_file.is_file() and plt.style.use(style_file)
pub_style_file = Path('mplstyles/pdf_1col.mplstyle')
pub_style = pub_style_file if pub_style_file.is_file() else 'default'

Setup¶

[4]:

def fetch_zenodo_file(record, file_name, target_path: str = '.'):
    url = f'https://zenodo.org/records/{record}/files/{file_name}.xz?download=1'
    try:
        with open(os.path.join(target_path, file_name), 'xb') as f_out:
            with urllib.request.urlopen(url) as response:
                with lzma.open(response) as decompressor:
                    shutil.copyfileobj(decompressor, f_out)
    except FileExistsError:
        return

[5]:

record = '19798734'
DB_FILE = 'paperdb.v4.sqlite'

[6]:

fetch_zenodo_file(record, DB_FILE)

Selecting actual routesets¶

[7]:

with database_connection(DB_FILE):
    Ns_q = NodeSet.select().where(~NodeSet.name.startswith('!'))
Ns_q.count()

[7]:

[8]:

entries = []
for ns in Ns_q:
    for rs in ns.routesets:
        entries.append((
            ns.name,
            rs.id,
            ns.T,
            rs.capacity,
            rs.detextra,
            rs.length,
            rs.creator,
            rs.misc.get('gap', None),
        ))
len(entries)

[8]:

Make DataFrame¶

[9]:

schema_actual = dict(
    name=str,
    id=int,
    T=int,
    capacity=int,
    detextra=float,
    length=float,
    creator=str,
    gap=float,
)

[10]:

df_actual = pl.DataFrame(entries, schema=schema_actual, orient='row')

[11]:

df_actual

[11]:

shape: (1_464, 8)

name	id	T	capacity	detextra	length	creator	gap
str	i64	i64	i64	f64	f64	str	f64
"Gode Wind 1"	1	55	5	0.000847	63626.868227	"MILP.pyomo.cplex"	0.004949
"Gode Wind 1"	2	55	7	null	55566.023496	"MILP.pyomo.cplex"	0.004982
"Gode Wind 1"	3	55	6	null	59109.208596	"MILP.pyomo.cplex"	0.005
"Gode Wind 1"	4	55	9	null	51675.248009	"MILP.pyomo.cplex"	0.005
"Gode Wind 1"	5	55	8	null	53268.425754	"MILP.pyomo.cplex"	0.004997
…	…	…	…	…	…	…	…
"Cazzaro-2022"	21630	50	10	0.0	7.876699	"MILP.pyomo.cplex"	0.00496
"Cazzaro-2022"	21631	50	11	2.2204e-16	7.71745	"MILP.pyomo.cplex"	0.004993
"Cazzaro-2022"	21632	50	8	0.0	8.425937	"MILP.pyomo.cplex"	0.004999
"Cazzaro-2022"	21633	50	12	-1.1102e-16	7.630594	"MILP.pyomo.cplex"	0.004999
"Cazzaro-2022"	22546	50	4	0.102661	11.920841	"MILP.pyomo.gurobi"	0.000732

[12]:

df_actual['name'].value_counts()

[12]:

shape: (69, 2)

name	count
str	u32
"Humber Gateway"	22
"Gemini 1"	22
"Gemini 2"	22
"Thanet"	22
"Taylor-2023.1_OSS"	22
…	…
"CECEP Yangjiang Nanpeng Island"	22
"SPIC Binhai North H2"	22
"Nordsee One"	22
"Rudong H8"	22
"Meerwind"	22

[13]:

df_actual['gap'].max()

[13]:

0.019817266284630097

[14]:

df_actual['detextra'].max()

[14]:

0.12263549531666174

group ⟨heuri, exact⟩ pairs¶

[15]:

pairs_actual = []
for ns in Ns_q:
    dfn = df_actual.filter(pl.col('name') == ns.name)
    for k in range(dfn['capacity'].min(), dfn['capacity'].max() + 1):
        dfk = dfn.filter(pl.col('capacity') == k)
        if dfk.height >= 2:
            mask = pl.col('creator') == 'baselines.hgs'
            heuri = dfk.filter(mask).bottom_k(1, by='length')[0]
            exact = dfk.filter(~mask).bottom_k(1, by='gap')[0]
            if exact['gap'].item() <= 0.02:
                pairs_actual.append((RouteSet.get_by_id(heuri['id'].item()),
                                     RouteSet.get_by_id(exact['id'].item())))
len(pairs_actual)

[15]:

[16]:

diffs = []
for rs_hgs, rs_bnc in pairs_actual:
    diffs.append(rs_hgs.length/rs_bnc.length - 1)

[17]:

for (heuri, exact), diff in zip(pairs_actual, diffs):
    if diff >= 0.07:
        print(heuri.id, exact.id, exact.nodes.name)

21311 22196 Sandbank
21545 21925 Moray West.1_OSS

[18]:

small_diffs = []
for (heuri, exact), diff in zip(pairs_actual, diffs):
    if diff <= 0.01:
        small_diffs.append((heuri.id, exact.id))
len(small_diffs)

[18]:

[19]:

rs = RouteSet.get_by_id(21311)
print(rs.detextra)
svgplot(G_from_routeset(rs))

0.07397519014347398

[19]:

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_26_1.svg

[20]:

rs = RouteSet.get_by_id(22196)
print(rs.detextra)
svgplot(G_from_routeset(rs))

0.0013995437575948788

[20]:

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_27_1.svg

[21]:

rs = RouteSet.get_by_id(21545)
print(rs.detextra)
svgplot(G_from_routeset(rs))

0.02038470064164777

[21]:

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_28_1.svg

[22]:

rs = RouteSet.get_by_id(21925)
print(rs.detextra)
svgplot(G_from_routeset(rs))

None

[22]:

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_29_1.svg

[23]:

plt.hist(diffs, bins=21);

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_30_0.svg

HGS optimizer¶

Select HGS-CVRP entries¶

[24]:

method_hgs = Method.select().where(
    Method.funname == 'hgs_cvrp',
    Method.options['timeLimit'] == 120
)
method_hgs.count()

[24]:

[25]:

for m in method_hgs:
    print(m.funname, m.options)

hgs_cvrp {'complete': False, 'lambda_': 40, 'mu': 25, 'nbClose': 5, 'nbElite': 4, 'nbGranular': 20, 'nbIter': 20000, 'scale': 10000.0, 'seed': 0, 'targetFeasible': 0.2, 'timeLimit': 120.0, 'useSwapStar': True}
hgs_cvrp {'complete': False, 'lambda_': 40, 'mu': 25, 'nbClose': 5, 'nbElite': 4, 'nbGranular': 20, 'nbIter': 20000, 'seed': 20241113, 'targetFeasible': 0.2, 'timeLimit': 120, 'useSwapStar': True}

synthetic instances (a single capacity per NodeSet)¶

[26]:

pairs_synth = []

[27]:

# hgs instances that were first solved by cplex
hgs_sub2 = (RouteSet.select()
    .join(NodeSet)
    .switch(RouteSet)
    .where(
        RouteSet.method.in_([m.digest for m in method_hgs]),
        RouteSet.misc['solver_details'].is_null(False),
        RouteSet.misc['solver_details']['incumbent_id'].is_null(False),
        NodeSet.name.startswith('!'),
        NodeSet.T >= 50,
    ))
# keep only instances for which the exact solution is within 2% gap
for rs_hgs in hgs_sub2:
    rs_bnc = RouteSet.get_by_id(rs_hgs.misc['solver_details']['incumbent_id'])
    if rs_bnc.misc['gap'] <= 0.02:
        pairs_synth.append((rs_hgs, rs_bnc))
hgs_sub2.count(), len(pairs_synth)

[27]:

(2346, 2346)

[28]:

# hgs instances that were solved by cplex afterward
hgs_sub2_ = (RouteSet.select()
    .join(NodeSet)
    .switch(RouteSet)
    .where(
        RouteSet.method.in_([m.digest for m in method_hgs]),
        RouteSet.misc['solver_details'].is_null(),
        NodeSet.name.startswith('!'),
        NodeSet.T >= 50,
    ))
# keep only instances for which the exact solution is within 2% gap
for rs_hgs in hgs_sub2_:
    if rs_hgs.nodes.routesets.count() == 1:
        # no MILP counterpart
        continue
    cplex = gurobi = None
    for rs2 in rs_hgs.nodes.routesets:
        if rs2.id == rs_hgs.id:
            continue
        if rs2.creator == 'MILP.pyomo.cplex':
            cplex = rs2
        elif rs2.creator == 'MILP.pyomo.gurobi':
            gurobi = rs2
    if gurobi and gurobi.misc['gap'] <= 0.02:
        pairs_synth.append((rs_hgs, gurobi))
    elif cplex and cplex.misc['gap'] <= 0.02:
        pairs_synth.append((rs_hgs, cplex))
hgs_sub2_.count(), len(pairs_synth)

[28]:

(4033, 6379)

[29]:

len(pairs_actual) + len(pairs_synth)

[29]:

Check if there are NodeSets counted multiple times¶

[30]:

len(pairs_synth)

[30]:

[31]:

once = set()
twice = set()
for rs_hgs, rs_bnc in pairs_synth:
    assert rs_hgs.nodes_id == rs_bnc.nodes_id, (rs_hgs.nodes.name, rs_bnc.nodes.name)
    if rs_hgs.nodes_id in once:
        if rs_hgs.nodes_id in twice:
            print(f'more than twice: {rs_hgs.nodes.name}')
        twice.add(rs_hgs.nodes_id)
    else:
        once.add(rs_hgs.nodes_id)
len(once), len(twice)

[31]:

(6379, 0)

Make DataFrame¶

[32]:

schema_hgs = dict(
    id=int,
    synthetic=bool,
    T=int,
    capacity=int,
    detextra=float,
    length=float,
    # incumbent=float,
    # incumbent_bound=float,
    exact_bound=float,
    # incumbent_id=int,
)

[33]:

data = []
for rs_hgs, rs_bnc in pairs_actual + pairs_synth:
    data.append(dict(
        id=rs_hgs.id,
        synthetic=rs_hgs.nodes.name[0] == '!',
        T=rs_hgs.T,
        capacity=rs_hgs.capacity,
        detextra=rs_hgs.detextra,
        length=rs_hgs.length,
        exact_bound=rs_bnc.misc['bound'],
    ))
    # print(details['incumbent_id'], end=' ')
df_hgs = pl.DataFrame(data, schema=schema_hgs)

[34]:

df_hgs.columns

[34]:

['id', 'synthetic', 'T', 'capacity', 'detextra', 'length', 'exact_bound']

[35]:

df_hgs['synthetic'].value_counts()

[35]:

shape: (2, 2)

synthetic	count
bool	u32
true	6379
false	732

[36]:

df_hgs['detextra'].describe()

[36]:

shape: (9, 2)

statistic	value
str	f64
"count"	5129.0
"null_count"	1982.0
"mean"	0.003079
"std"	0.005495
"min"	-0.018452
"25%"	0.000774
"50%"	0.001801
"75%"	0.003515
"max"	0.118956

[37]:

detextra_hgs = np.sort(np.nan_to_num(np.clip(df_hgs['detextra'], 0., None), nan=0.))

[38]:

# Negative detextra in hgs is OK.
# It means some subtree roots chosen by PathFinder were closer to root
# than the path's head (since PathFinder relaxes the path-subtree constraint)
df_hgs.filter(pl.col('detextra') < -1e-3)

[38]:

shape: (35, 7)

id	synthetic	T	capacity	detextra	length	exact_bound
i64	bool	i64	i64	f64	f64	f64
21006	false	60	10	-0.002975	51296.546654	49611.477292
21209	false	55	2	-0.003444	182175.265645	182804.81475
21208	false	55	4	-0.001266	99909.593562	99531.718807
21556	false	119	2	-0.017189	405793.043417	412587.411775
21557	false	119	3	-0.007867	291621.821448	292531.694887
…	…	…	…	…	…	…
16186	true	70	10	-0.009745	7.983239	7.73576
16857	true	50	4	-0.003896	8.106149	8.051141
17139	true	133	10	-0.002333	12.255932	11.63741
17696	true	68	8	-0.012723	8.916369	8.541159
17764	true	57	4	-0.001816	10.979295	10.874424

B&C solver¶

Make DataFrame¶

[39]:

schema_exact = dict(
    id=int,
    synthetic=bool,
    T=int,
    capacity=int,
    detextra=float,
    length=float,
    bound=float,
)

[40]:

data = []
for rs_hgs, rs_bnc in pairs_actual + pairs_synth:
    data.append(dict(
        id=rs_bnc.id,
        synthetic=rs_bnc.nodes.name[0] == '!',
        T=rs_bnc.T,
        capacity=rs_bnc.capacity,
        detextra=rs_bnc.detextra,
        length=rs_bnc.length,
        bound=rs_bnc.misc['bound'],
    ))
df_BnC = pl.DataFrame(data, schema=schema_exact)

[41]:

df_BnC.height

[41]:

[42]:

df_BnC['detextra'].describe()

[42]:

shape: (9, 2)

statistic	value
str	f64
"count"	4949.0
"null_count"	2162.0
"mean"	0.002359
"std"	0.004466
"min"	-0.009212
"25%"	0.000637
"50%"	0.001591
"75%"	0.003006
"max"	0.122635

[43]:

detextra_BnC = np.sort(np.nan_to_num(np.clip(df_BnC['detextra'], 0., None), nan=0.))

3.3 Impact of detours on the solver’s total length¶

[44]:

with plt.style.context(pub_style):
    data = 100*detextra_BnC

    fig, (ax1, ax2) = plt.subplots(2, 1, height_ratios=[1, 5], figsize=(3.5, 2),
                                   facecolor='w', sharex=True)
    # fig.subplots_adjust(hspace=0.05)
    ax1.spines.bottom.set_visible(False)
    ax1.xaxis.tick_top()
    ax1.tick_params(labeltop=False)
    num_data = len(data)
    below1 = data <= 1
    below2 = data <= 2
    bins = np.linspace(0, 3, num=76)
    # bins = None

    # top y_axis segment
    H1, X1, art1_ = ax1.hist(data[below1], bins = bins, color='tab:blue',
                             histtype='stepfilled', alpha=0.3)
    H2, X2, art2_ = ax1.hist(data[below2], bins = bins, edgecolor='tab:blue',
                             histtype='stepfilled', hatch=r'\\\\', facecolor='none')
    H, X, art_ = ax1.hist(data, bins = bins, color='tab:blue', histtype='step')

    # main y_axis segment
    H1, X1, art1_ = ax2.hist(data[below1], bins = bins, color='tab:blue',
                             histtype='stepfilled', alpha=0.3)
    H2, X2, art2_ = ax2.hist(data[below2], bins = bins, edgecolor='tab:blue',
                             histtype='stepfilled', hatch=r'\\\\', facecolor='none')
    H, X, art_ = ax2.hist(data, bins = bins, color='tab:blue', histtype='step')

    ax1.set_ylim(3100, 3200)  # outliers only
    ax2.set_ylim(0, 600)  # most of the data

    C1, C2 = H1.sum(), H2.sum()
    leg = ax2.legend(  # the empty Line2D is a hack to get a line instead of a patch
        [Line2D([], []), art2_[0], art1_[0]],
        ['Instance count',
         f'≤ 2%: {100*C2/num_data:.1f}%',
         f'≤ 1%: {100*C1/num_data:.1f}%'],
        title=f'B&C instances: {num_data}'
    )
    ax2.set_ylabel('Count')
    ax2.set_xlabel('Relative increase (%)')
    leg.legend_handles[0] = Line2D([], [])
    ax1.figure.savefig('fig_detextra_BnC.pdf', transparent=False)
    pass

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_59_0.svg

[45]:

with plt.style.context(pub_style):
# with plt.style.context('pub_paperfarm'):
# if True:
    data = 100*detextra_hgs

    fig, (ax1, ax2) = plt.subplots(2, 1, height_ratios=[1, 5], figsize=(3.5, 2),
                                   facecolor='w', sharex=True)
    # fig.subplots_adjust(hspace=0.05)
    ax1.spines.bottom.set_visible(False)
    ax1.xaxis.tick_top()
    ax1.tick_params(labeltop=False)
    num_data = len(data)
    below1 = data <= 1
    below2 = data <= 2
    bins = np.linspace(0, 3, num=76)
    # bins = None

    # top y_axis segment
    H1, X1, art1_ = ax1.hist(data[below1], bins = bins, color='tab:blue',
                             histtype='stepfilled', alpha=0.3)
    H2, X2, art2_ = ax1.hist(data[below2], bins = bins, edgecolor='tab:blue',
                             histtype='stepfilled', hatch=r'\\\\', facecolor='none')
    H, X, art_ = ax1.hist(data, bins = bins, color='tab:blue', histtype='step')

    # main y_axis segment
    H1, X1, art1_ = ax2.hist(data[below1], bins = bins, color='tab:blue',
                             histtype='stepfilled', alpha=0.3)
    H2, X2, art2_ = ax2.hist(data[below2], bins = bins, edgecolor='tab:blue',
                             histtype='stepfilled', hatch=r'\\\\', facecolor='none')
    H, X, art_ = ax2.hist(data, bins = bins, color='tab:blue', histtype='step')

    ax1.set_ylim(2800, 2900)  # outliers only
    ax2.set_ylim(0, 600)  # most of the data

    C1, C2 = H1.sum(), H2.sum()
    leg = ax2.legend(  # the empty Line2D is a hack to get a line instead of a patch
        [Line2D([], []), art2_[0], art1_[0]],
        ['Instance count',
         f'≤ 2%: {100*C2/num_data:.1f}%',
         f'≤ 1%: {100*C1/num_data:.1f}%'],
        title=f'HGS instances: {num_data}'
    )
    ax2.set_ylabel('Count')
    ax2.set_xlabel('Relative increase (%)')
    leg.legend_handles[0] = Line2D([], [])
    ax1.figure.savefig('fig_detextra_HGS.pdf', transparent=False)
    pass

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_60_0.svg

3.4 Comparison of B&C and HGS solutions¶

[46]:

with plt.style.context(pub_style):
    data_HGSvsBnC_gap = 1 - df_BnC['length']/df_hgs['length']
    data = 100*data_HGSvsBnC_gap.to_numpy()

    fig, ax = plt.subplots(facecolor='w', figsize=(3.5, 2))
    num_data = len(data)
    below1 = data <= 1
    below2 = data <= 2
    elsewhere = data > 2
    bins = np.linspace(-2, 8, num=201)

    # main y_axis segment
    H1, X1, art1_ = ax.hist(data[below1], bins = bins, color='tab:blue',
                             histtype='stepfilled', alpha=0.3)
    H2, X2, art2_ = ax.hist(data[below2], bins = bins, edgecolor='tab:blue',
                             histtype='stepfilled', hatch=r'\\\\', facecolor='none')
    H, X, art_ = ax.hist(data, bins = bins, color='tab:blue', histtype='step')

    C1, C2 = H1.sum(), H2.sum()
    leg = ax.legend(  # the empty Line2D is a hack to get a line instead of a patch
        [Line2D([], []), art2_[0], art1_[0]],
        ['HGS to B&C gap',
         f'≤ 2%: {100*C2/num_data:.1f}%',
         f'≤ 1%: {100*C1/num_data:.1f}%'],
        title=f'Instances: {num_data}'
    )
    ax.set_ylabel('Count')
    ax.set_xlabel('Relative difference (%)')
    leg.legend_handles[0] = Line2D([], [])
    ax.set_xlim(-2, 8)
    ax.figure.savefig('fig_HGS_to_BnC_gap.pdf', transparent=False)
    pass

../_images/notebooks_43-Paper_3.3_detours.4_HGSvsBnC.5_crossings_62_0.svg

3.5 Dealing with crossings from HGS-CVRP¶

Select HGS-CVRP entries¶

[47]:

df_hgs.height

[47]:

[48]:

method_hgs = Method.select().where(
    Method.funname == 'hgs_cvrp',
    Method.options['timeLimit'] == 120
)

[49]:

method_hgs.count()

[49]:

[50]:

hgs_all = RouteSet.select().where(
    RouteSet.method.in_([m.digest for m in method_hgs])
)
hgs_all.count()

[50]:

[51]:

hgs_invalid = 0  # routeset.valid removed from the schema
hgs_invalid

[51]:

[52]:

# just the instances repaired after the first hgs run
hgs_repaired = hgs_all.where(
    RouteSet.misc['repaired'].is_null(False),
    RouteSet.misc['hgs_reruns'].is_null(),
)
hgs_repaired.count()

[52]:

[53]:

hgs_reruns = hgs_all.where(
    RouteSet.misc['hgs_reruns'].is_null(False),
)
hgs_reruns.count()

[53]:

Statistics¶

[54]:

hgs_all = RouteSet.select().where(
    RouteSet.creator == 'baselines.hgs'
)
hgs_all.count()

[54]:

[55]:

# just the instances repaired after the first hgs run
hgs_repaired = hgs_all.where(
    RouteSet.misc['repaired'].is_null(False),
    RouteSet.misc['hgs_reruns'].is_null(),
)
hgs_repaired.count()

[55]:

[56]:

hgs_reruns = hgs_all.where(
    RouteSet.misc['hgs_reruns'].is_null(False),
)
hgs_reruns.count()

[56]:

[57]:

(hgs_repaired.count() + hgs_reruns.count())/hgs_all.count()

[57]:

0.06046969483898186

[58]:

hgs_repaired.count()/hgs_all.count()

[58]:

0.04668822950358599

[59]:

hgs_reruns.count()/hgs_all.count()

[59]:

0.013781465335395865

Examining re-runs¶

[60]:

subcounts = []
for reruns in range(1, 10):
    count = hgs_reruns.where(
        RouteSet.misc['hgs_reruns'] == reruns
    ).count()
    subcounts.append(count)
    print(reruns, count, count/hgs_all.count())

1 81 0.011390802981296582
2 12 0.0016875263675994938
3 2 0.0002812543945999156
4 1 0.0001406271972999578
5 2 0.0002812543945999156
6 0 0.0
7 0 0.0
8 0 0.0
9 0 0.0

[61]:

total = sum(subcounts)
for reruns, count in zip(range(1, 10), subcounts):
    print(reruns, f'{100*count/total:.1f}%')

1 82.7%
2 12.2%
3 2.0%
4 1.0%
5 2.0%
6 0.0%
7 0.0%
8 0.0%
9 0.0%