【雑記】ヨットのスコアの期待値

本記事はヨットのスコアの期待値に関する記事です．
『世界のアソビ大全51』におけるルールに従います．
スコアの期待値を最大化する戦略を採用した場合， $191.774369188342$ となります．
既約分数で表すと，
\frac{13016843164781134911577847485373669410583272579736395462208071768435842671487294870976987471456569489783095557501482949120609571264927}{67875823134619594730407653673318155404265238604384965333372979438895032016829720226590246561116620399665156181673853449412452286464}
です．
解法

325点が達成される確率と同様のDPで解きます．ボーナス点の進捗を追加で管理します．
メモリを96GB割り当ててなんとか動きました．計算の順序を工夫したりしたほうがいいかもしれません．
from __future__ import annotations

from functools import lru_cache
from fractions import Fraction
from typing import Dict, List, Tuple

# ============================================================
# SixFrac: exact rational number of the form a / 6^b
#   - __add__: align exponents with powers of 6
#   - div6(): divide by 6 via b += 1 only (do not touch numerator)
#
# Expected-score DP also becomes exact a/6^b because all branching is uniform over faces.
# ============================================================

MAX_B: int = 180
POW6: List[int] = [1] * (MAX_B + 1)
for i in range(1, MAX_B + 1):
    POW6[i] = POW6[i - 1] * 6


class SixFrac:
    __slots__ = ("a", "b")

    a: int
    b: int

    def __new__(cls, a: int = 0, b: int = 0) -> SixFrac:
        a_i = int(a)
        b_i = int(b)
        if a_i == 0:
            b_i = 0
        obj = super().__new__(cls)
        obj.a = a_i
        obj.b = b_i
        return obj

    def __hash__(self) -> int:
        return hash((self.a, self.b))

    def __add__(self, other: SixFrac) -> SixFrac:
        if self.a == 0:
            return other
        if other.a == 0:
            return self
        if self.b == other.b:
            return SixFrac(self.a + other.a, self.b)
        if self.b < other.b:
            diff = other.b - self.b
            return SixFrac(self.a * POW6[diff] + other.a, other.b)
        diff = self.b - other.b
        return SixFrac(self.a + other.a * POW6[diff], self.b)

    def div6(self) -> SixFrac:
        if self.a == 0:
            return self
        nb = self.b + 1
        if nb > MAX_B:
            raise ValueError(f"Exponent too large: {nb} > {MAX_B}")
        return SixFrac(self.a, nb)

    def __lt__(self, other: SixFrac) -> bool:
        if self.a == 0 and other.a == 0:
            return False
        if self.b == other.b:
            return self.a < other.a
        if self.b < other.b:
            diff = other.b - self.b
            return self.a * POW6[diff] < other.a
        diff = self.b - other.b
        return self.a < other.a * POW6[diff]

    def __le__(self, other: SixFrac) -> bool:
        return self == other or self < other

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, SixFrac):
            return False
        if self.a == 0 and other.a == 0:
            return True
        if self.b == other.b:
            return self.a == other.a
        if self.b < other.b:
            diff = other.b - self.b
            return self.a * POW6[diff] == other.a
        diff = self.b - other.b
        return self.a == other.a * POW6[diff]

    def to_float(self) -> float:
        return self.a / POW6[self.b]

    def __repr__(self) -> str:
        return f"{self.a}/6^{self.b}"


# ============================================================
# State: counts of faces 1..6 with total <= 5
#
# We index **all** count-vectors (c1..c6) with sum <= 5:
#   number of states = C(11, 5) = 462.
#
# Interpretation:
#   sum(c1..c6) = k  means "k dice are already fixed, and (5-k) dice are still undecided".
#   The transition next_states() fixes one more die (adds 1 to some face).
# ============================================================

DiceCounts = Tuple[int, int, int, int, int, int]
StateId = int

# 0..5 : upper (1..6)
# 6    : Choice
# 7    : Four-of-a-kind
# 8    : Full House
# 9    : Small Straight
# 10   : Large Straight
# 11   : Yacht
N_CATS: int = 12
ALL_MASK: int = (1 << N_CATS) - 1
UPPER_MASK: int = (1 << 6) - 1
BONUS_THRESHOLD: int = 63
BONUS_SCORE: int = 35


class State:
    __slots__ = ("counts",)

    counts: DiceCounts

    def __init__(self, counts: DiceCounts) -> None:
        assert len(counts) == 6
        self.counts = counts

    def __hash__(self) -> int:
        return hash(self.counts)

    def __eq__(self, other: object) -> bool:
        return isinstance(other, State) and self.counts == other.counts

    def total(self) -> int:
        return sum(self.counts)

    def is_5kind(self) -> bool:
        return any(c == 5 for c in self.counts)

    def is_small_straight(self) -> bool:
        present = [1 if c > 0 else 0 for c in self.counts]
        return (
            (present[0] and present[1] and present[2] and present[3]) or
            (present[1] and present[2] and present[3] and present[4]) or
            (present[2] and present[3] and present[4] and present[5])
        )

    def is_big_straight(self) -> bool:
        present = [1 if c > 0 else 0 for c in self.counts]
        return (
            (present[0] and present[1] and present[2] and present[3] and present[4]) or
            (present[1] and present[2] and present[3] and present[4] and present[5])
        )

    def next_states(self) -> List[State]:
        """
        If total < 5, fix one additional die as each face (6 successors).
        If total == 5, there is no successor (already fully fixed).
        """
        res: List[State] = []
        if self.total() < 5:
            for f in range(6):
                n = list(self.counts)
                n[f] += 1
                res.append(State(tuple(n)))  # type: ignore[arg-type]
        return res

    def unfix(self) -> List[State]:
        res: List[State] = []
        for f in range(6):
            if self.counts[f] > 0:
                n = list(self.counts)
                n[f] -= 1
                res.append(State(tuple(n)))
        return res

    # --- scoring for a fully-fixed (total==5) hand ---
    def score_of_cat(self, cat: int) -> int:
        """
        Score obtained if we lock this fully-fixed hand into category 'cat'.
        Only meaningful when total == 5.
        """
        assert self.total() == 5
        c = self.counts
        dice_sum = sum((i + 1) * c[i] for i in range(6))

        if 0 <= cat <= 5:
            # upper: sum of that face
            return (cat + 1) * c[cat]

        if cat == 6:
            # Choice
            return dice_sum

        if cat == 7:
            # Four-of-a-kind: sum of dice if any count>=4 else 0
            return dice_sum if any(x >= 4 for x in c) else 0

        if cat == 8:
            # Full House: (3,2) then score is SUM of dice; else 0
            has3 = any(x == 3 for x in c)
            has2 = any(x == 2 for x in c)
            return dice_sum if (has3 and has2) or self.is_5kind() else 0

        if cat == 9:
            # Small Straight: 15
            return 15 if self.is_small_straight() else 0

        if cat == 10:
            # Large Straight: 30
            return 30 if self.is_big_straight() else 0

        if cat == 11:
            # Yacht: 50
            return 50 if self.is_5kind() else 0

        raise ValueError(f"invalid cat: {cat}")


# ============================================================
# Enumerate all states with sum <= 5 (462 states), and build transitions
# ============================================================

ID_OF: Dict[State, StateId] = {}
STATE_OF: List[State] = []


def gen_compositions_upto(total: int = 5) -> List[State]:
    """
    Generate all 6-tuples of nonnegative integers (c1..c6) with sum <= total.
    Count is C(total+6, 6) = C(11, 5) = 462 when total=5.
    """
    res: List[State] = []
    cur = [0] * 6

    def rec(i: int, rem: int) -> None:
        if i == 6:
            res.append(State(tuple(cur)))  # type: ignore[arg-type]
            return
        for x in range(rem + 1):
            cur[i] = x
            rec(i + 1, rem - x)

    rec(0, total)
    return res


for st in gen_compositions_upto(5):
    ID_OF[st] = len(STATE_OF)
    STATE_OF.append(st)

N_STATES: int = len(STATE_OF)  # 462

NEXT: List[List[StateId]] = [[ID_OF[nst] for nst in st.next_states()] for st in STATE_OF]
UNFIX: List[List[StateId]] = [[ID_OF[nst] for nst in st.unfix()] for st in STATE_OF]

START_ALL: StateId = ID_OF[State((0, 0, 0, 0, 0, 0))]

# SCORE[final_id][cat] (only valid when total==5, otherwise unused)
SCORE: List[List[int]] = [[0] * N_CATS for _ in range(N_STATES)]
for sid, st in enumerate(STATE_OF):
    if not NEXT[sid]:  # total == 5
        for cat in range(N_CATS):
            SCORE[sid][cat] = st.score_of_cat(cat)


# ============================================================
# DP (expected score)
#
# Add bonus progress state:
#   bonus in [0..63], where 63 means ">=63 already secured"
#
# Terminal adds +35 if bonus>=63.
# ============================================================

def clamp_bonus(x: int) -> int:
    return BONUS_THRESHOLD if x >= BONUS_THRESHOLD else x


@lru_cache(maxsize=None)
def dp(mask: int, bonus: int, final_id: StateId, rerolls_left: int) -> SixFrac:
    # All categories filled: add upper bonus if achieved.
    if mask == ALL_MASK:
        return SixFrac(BONUS_SCORE, 0) if bonus >= BONUS_THRESHOLD else SixFrac(0, 0)

    # In this representation, "final" means total == 5, i.e. NEXT[final_id] is empty.
    if NEXT[final_id]:
        raise ValueError("dp called with non-final state (total < 5)")

    if rerolls_left > 0:
        return best_hold(mask, bonus, final_id, rerolls_left)

    # Must choose one unfilled category to maximize expected total future score.
    best = SixFrac(-10**18, 0)  # safe -inf
    for cat in range(N_CATS):
        if (mask >> cat) & 1:
            continue

        p = SCORE[final_id][cat]
        nmask = mask | (1 << cat)

        nbonus = bonus
        if 0 <= cat <= 5:
            nbonus = clamp_bonus(bonus + p)

        val = SixFrac(p, 0) + roll_expect(nmask, nbonus, START_ALL, 2)
        if best < val:
            best = val

    return best


@lru_cache(maxsize=None)
def roll_expect(mask: int, bonus: int, start_id: StateId, next_rerolls_left: int) -> SixFrac:
    # Once all categories are filled, remaining dice do not matter.
    if mask == ALL_MASK:
        return SixFrac(BONUS_SCORE, 0) if bonus >= BONUS_THRESHOLD else SixFrac(0, 0)

    # total == 5 <=> fully fixed roll outcome -> hand off to dp
    if not NEXT[start_id]:
        return dp(mask, bonus, start_id, next_rerolls_left)

    acc = SixFrac(0, 0)
    for nid in NEXT[start_id]:  # length 6 when total < 5
        acc = acc + roll_expect(mask, bonus, nid, next_rerolls_left)
    return acc.div6()


@lru_cache(maxsize=None)
def best_hold(mask: int, bonus: int, cur_id: StateId, rerolls_left: int) -> SixFrac:
    """
    Decide which dice to keep by repeatedly 'unfixing' one die at a time.
    cur_id: current kept-dice state (total<=5).
    rerolls_left: rerolls available *before* performing the next reroll.
    """
    # stop here: reroll the remaining dice once
    best = roll_expect(mask, bonus, cur_id, rerolls_left - 1)

    # or unfix one more die and continue deciding
    for uid in UNFIX[cur_id]:
        val = best_hold(mask, bonus, uid, rerolls_left)
        if best < val:
            best = val
    return best


def expected_score_optimal() -> SixFrac:
    # start with no categories filled, upper sum=0, and 2 rerolls available for the first turn.
    return roll_expect(0, 0, START_ALL, 2)


if __name__ == "__main__":
    ex = expected_score_optimal()
    print("Optimal expected total score as SixFrac a/6^b:")
    print(Fraction(ex.a, 6 ** ex.b))
    print(f"approx = {ex.to_float():.12f}")