import glob
import math
import os
import shutil
import subprocess
import time
import matplotlib
matplotlib.use('Agg')
import imageio
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
from hyperopt import fmin, hp, tpe
import torch_optimizer as optim
sns.set_theme(style="whitegrid")
def rosenbrock(tensor):
x, y = tensor
return (1 - x) ** 2 + 100 * (y - x**2) ** 2
def rastrigin(tensor, lib=torch):
x, y = tensor
A = 10
f = (A * 2 + (x**2 - A * lib.cos(x * math.pi * 2)) + (y**2 - A * lib.cos(y * math.pi * 2)))
return f
def execute_steps(func, initial_state, optimizer_class, optimizer_config, num_iter=500):
x = torch.Tensor(initial_state).requires_grad_(True)
optimizer = optimizer_class([x], **optimizer_config)
steps = np.zeros((2, num_iter + 1))
steps[:, 0] = np.array(initial_state)
for i in range(1, num_iter + 1):
optimizer.zero_grad()
f = func(x)
f.backward(create_graph=True, retain_graph=True)
torch.nn.utils.clip_grad_norm_(x, 1.0)
optimizer.step()
steps[:, i] = x.detach().numpy()
return steps
def objective_rastrigin(params):
optimizer_config = dict(lr=params["lr"])
steps = execute_steps(rastrigin, (-2.0, 3.5), params["optimizer_class"], optimizer_config, 100)
minimum = (0, 0)
return (steps[0][-1] - minimum[0]) ** 2 + (steps[1][-1] - minimum[1]) ** 2
def objective_rosenbrock(params):
optimizer_config = dict(lr=params["lr"])
steps = execute_steps(rosenbrock, (-2.0, 2.0), params["optimizer_class"], optimizer_config, 100)
minimum = (1.0, 1.0)
return (steps[0][-1] - minimum[0]) ** 2 + (steps[1][-1] - minimum[1]) ** 2
def plot_static_image(steps, func_name, optimizer_name, lr, X, Y, Z, minimum):
fig, ax = plt.subplots(figsize=(8, 8))
if func_name == "rosenbrock":
ax.contour(X, Y, Z, np.logspace(-0.5, 3.5, 20, base=10), cmap="jet")
else:
ax.contour(X, Y, Z, 20, cmap="jet")
iter_x, iter_y = steps[0, :], steps[1, :]
ax.plot(iter_x, iter_y, "r-x", label="Optimizer Path")
ax.plot(iter_x[0], iter_y[0], 'go', markersize=10, label='Start')
ax.plot(iter_x[-1], iter_y[-1], "rD", markersize=10, label="End")
ax.plot(*minimum, "gD", markersize=10, label="Global Minimum")
ax.legend()
ax.set_title(f"{func_name.capitalize()} Function: {optimizer_name}\n{len(iter_x)-1} iterations, lr={lr:.6f}")
output_path = f"docs/{func_name}_{optimizer_name}.png"
plt.savefig(output_path)
plt.close(fig)
def create_animation_with_fading_tail(
steps, func_name, optimizer_name, lr, X, Y, Z, minimum,
gif_resolution=256, tail_length=20, fade_length=30
):
fig_size_inches = 8
dpi = gif_resolution / fig_size_inches
num_frames = steps.shape[1]
images = []
print(f" - Step 1/3: Rendering {num_frames} frames into memory (with fading tail)...")
for i in range(num_frames):
fig, ax = plt.subplots(figsize=(fig_size_inches, fig_size_inches), dpi=dpi)
if func_name == "rosenbrock":
ax.contour(X, Y, Z, np.logspace(-0.5, 3.5, 20, base=10), cmap="jet")
else:
ax.contour(X, Y, Z, 20, cmap="jet")
ax.plot(*minimum, "gD", markersize=10, label="Global Minimum")
start_solid = max(0, i - tail_length)
solid_path = steps[:, start_solid : i + 1]
ax.plot(solid_path[0], solid_path[1], "r-", lw=1.5)
ax.plot(solid_path[0], solid_path[1], "rx", markersize=4)
start_fade = max(0, start_solid - fade_length)
for j in range(start_solid - 1, start_fade - 1, -1):
age = start_solid - j
alpha = 1.0 - (age / fade_length)
segment = steps[:, j : j + 2]
ax.plot(segment[0], segment[1], color='red', lw=1.5, alpha=alpha)
ax.plot(steps[0, i], steps[1, i], "rD", markersize=8, label="Current Position")
ax.legend()
ax.set_title(f"{func_name.capitalize()} Function: {optimizer_name}\nIteration: {i}/{num_frames-1}, lr={lr:.6f}")
fig.canvas.draw()
argb_buffer = fig.canvas.tostring_argb()
image_argb = np.frombuffer(argb_buffer, dtype='uint8').reshape(fig.canvas.get_width_height()[::-1] + (4,))
image_rgb = image_argb[:, :, 1:]
images.append(image_rgb)
plt.close(fig)
print(f"\r Rendered frame {i + 1}/{num_frames}", end="")
print()
output_path = f"gifs/{func_name}_{optimizer_name}.gif"
print(f" - Step 2/3: Creating initial GIF with imageio...")
imageio.mimsave(output_path, images, fps=25)
size_before = os.path.getsize(output_path) / 1024
print(f" Initial GIF saved ({size_before:.1f} KB).")
print(f" - Step 3/3: Compressing GIF with gifsicle...")
try:
subprocess.run(
["gifsicle", "-O2", "--colors", "256", "-o", output_path, output_path],
check=True, capture_output=True, text=True
)
size_after = os.path.getsize(output_path) / 1024
reduction = (1 - size_after / size_before) * 100 if size_before > 0 else 0
print(f" GIF compressed successfully. Size reduced by {reduction:.1f}% to {size_after:.1f} KB.")
except (subprocess.CalledProcessError, FileNotFoundError) as e:
print("\n [WARNING] Gifsicle compression failed.")
print(" Please ensure 'gifsicle' is installed and in your system's PATH.")
if isinstance(e, subprocess.CalledProcessError):
print(f" Gifsicle stderr: {e.stderr}")
def execute_experiments(optimizers, objective, func, func_name, plot_params, initial_state, gif_config, seed=1):
total_optimizers = len(optimizers)
print("=" * 60)
print(f"STARTING EXPERIMENTS FOR: {func_name.capitalize()} Function")
print(f"Total optimizers to test: {total_optimizers}")
print("=" * 60)
if not os.path.exists("docs"): os.makedirs("docs")
if not os.path.exists("gifs"): os.makedirs("gifs")
x = np.linspace(plot_params['xlim'][0], plot_params['xlim'][1], 250)
y = np.linspace(plot_params['ylim'][0], plot_params['ylim'][1], 250)
X, Y = np.meshgrid(x, y)
Z = func([X, Y], lib=np) if func_name == 'rastrigin' else func([X, Y])
for i, item in enumerate(optimizers):
optimizer_class, lr_low, lr_hi = item
optimizer_name = optimizer_class.__name__
print(f"\n[{i + 1}/{total_optimizers}] PROCESSING: {optimizer_name}")
print("-" * 40)
print(" 1. Finding best learning rate with Hyperopt...")
start_time = time.time()
space = {"optimizer_class": hp.choice("optimizer_class", [optimizer_class]), "lr": hp.loguniform("lr", lr_low, lr_hi)}
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100, rstate=np.random.default_rng(seed), verbose=0)
end_time = time.time()
print(f" - Best LR found: {best['lr']:.6f} (search took {end_time - start_time:.2f}s)")
print(" 2. Generating full optimization path...")
steps = execute_steps(func, initial_state, optimizer_class, {"lr": best["lr"]}, num_iter=500)
print(" - Path generated.")
print(" 3. Creating and saving static image...")
plot_static_image(steps, func_name, optimizer_name, best['lr'], X, Y, Z, plot_params['minimum'])
print(f" - Static image saved to docs/{func_name}_{optimizer_name}.png")
print(" 4. Creating and saving animated GIF with fading tail...")
start_time = time.time()
create_animation_with_fading_tail(
steps, func_name, optimizer_name, best['lr'], X, Y, Z, plot_params['minimum'],
gif_resolution=gif_config['resolution'],
tail_length=gif_config['tail_length'],
fade_length=gif_config['fade_length']
)
end_time = time.time()
print(f" - Animation created successfully in {end_time - start_time:.2f} seconds.")
print(f"--- Finished processing {optimizer_name} ---")
def LookaheadYogi(*a, **kw):
base = optim.Yogi(*a, **kw)
return optim.Lookahead(base)
if __name__ == "__main__":
GIF_CONFIG = {
"resolution": 800,
"tail_length": 20,
"fade_length": 30
}
optimizers_to_test = [
(torch.optim.Adamax, -8, 0.5), (torch.optim.Adagrad, -8, 0.5),
(torch.optim.Adadelta, -8, 0.5), (torch.optim.RMSprop, -8, -2),
(torch.optim.Rprop, -8, 0.5), (torch.optim.NAdam, -8, -1)
]
plot_params_rastrigin = {'xlim': (-4.5, 4.5), 'ylim': (-4.5, 4.5), 'minimum': (0, 0)}
execute_experiments(
optimizers_to_test, objective_rastrigin, rastrigin, 'rastrigin',
plot_params_rastrigin, initial_state=(-2.0, 3.5), gif_config=GIF_CONFIG
)
plot_params_rosenbrock = {'xlim': (-2, 2), 'ylim': (-1, 3), 'minimum': (1.0, 1.0)}
execute_experiments(
optimizers_to_test, objective_rosenbrock, rosenbrock, 'rosenbrock',
plot_params_rosenbrock, initial_state=(-2.0, 2.0), gif_config=GIF_CONFIG
)
print("\n" + "="*60)
print("ALL EXPERIMENTS COMPLETE!")
print("Check the 'docs' directory for static images and 'gifs' for animations.")
print("="*60)