问题现象
绘制碳(C)原子在 hcp 位点上的 $\Delta E_{\mathrm{ads}}$ 曲线时,在 $z \approx 11 \mathbin{-} 12\ \mathrm{Å}$ 区域出现明显异常波动,曲线形状与原始数据趋势严重不符,看起来像是插值算法"编"出了新的物理结构。
根因分析
检查原始 DFT 数据发现,C 的 hcp 位点在 $z \approx 11.6585\ \mathrm{Å}$ 附近存在一个极高的能量点:
$$ \begin{aligned} z \approx 11.5664\ \mathrm{Å} &\quad \Delta E_{\mathrm{ads}} \approx 2003.75\ \mathrm{eV} \ z \approx 11.6585\ \mathrm{Å} &\quad \Delta E_{\mathrm{ads}} \approx 12533.34\ \mathrm{eV} \ z \approx 11.7507\ \mathrm{Å} &\quad \Delta E_{\mathrm{ads}} \approx 1437.67\ \mathrm{eV} \end{aligned} $$
中间点的能量高达约 $1.25 \times 10^4\ \mathrm{eV}$,相邻两点之间的能量跨度极大,形成了非常陡峭的尖峰。
CubicSpline 三次样条插值强制要求整条曲线二阶连续可导。为了在尖峰两侧满足光滑性约束,样条在峰值前后会额外"摆动",产生原始数据中并不存在的伪极值点——这就是过冲(overshoot)和振荡(ringing)现象。
注意:异常曲线并非数据读取错误,而是插值方法本身在尖峰区域的数学特性所致。
解决方案:PCHIP 形状保持插值
将插值方法从 scipy.interpolate.CubicSpline 替换为 scipy.interpolate.PchipInterpolator。
PCHIP(Piecewise Cubic Hermite Interpolating Polynomial)同样是分段三次插值,但它属于形状保持插值(shape-preserving interpolation)。核心区别在于:
| 特性 | CubicSpline | PchipInterpolator |
|---|---|---|
| 光滑性 | 二阶连续可导 | 一阶连续可导 |
| 过冲控制 | 尖峰区域容易过冲 | 不会产生额外振荡 |
| 对局部形状的尊重 | 较低 | 较高 |
| 适用场景 | 平滑、缓慢变化的数据 | 含尖峰、快速变化或非单调趋势的数据 |
PCHIP 通过约束每个区间内插值多项式的导数符号来保证单调性,因此即便原数据中存在极度尖锐的峰,插值曲线也不会在峰的两侧产生不应有的凹陷或凸起。
代码修改
修改前后对比:
修改前(CubicSpline):
from scipy.interpolate import CubicSpline
spline = CubicSpline(x_array, y_array)
x_smooth = np.linspace(x_array.min(), x_array.max(), SPLINE_POINTS)
y_smooth = spline(x_smooth)
修改后(PchipInterpolator):
from scipy.interpolate import PchipInterpolator
spline = PchipInterpolator(x_array, y_array)
x_smooth = np.linspace(x_array.min(), x_array.max(), SPLINE_POINTS)
y_smooth = spline(x_smooth)
仅需更改导入和实例化的一行代码,即可解决异常振荡问题。
完整脚本
以下是修复后的完整绘图脚本 plot_energy_adatom.py,支持六种元素(C、H、N、O、S、Si)在四种吸附位点(top、bridge、fcc、hcp)上分别绘制 $\Delta E_{\mathrm{ads}}$ 曲线,同时叠加 TA017(纯净表面,实线)和 TA018(含空位表面,虚线)两套数据:
#!/usr/bin/env python3
"""Plot TA017/TA018 delta adsorption energy curves by element."""
from __future__ import annotations
import csv
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D
from scipy.interpolate import PchipInterpolator
PROJECT_ROOT = Path(__file__).resolve().parents[1]
TA017_DIR = PROJECT_ROOT / "dat/ta017_energy"
TA018_DIR = PROJECT_ROOT / "dat/ta018_energy_vac"
OUT_DIR = PROJECT_ROOT / "viz/energy_adatom"
X_COL = "Adatom z coordinate (angstrom)"
Y_COL = "delta_E_abs_eV"
ELEMENTS = ("C", "H", "N", "O", "S", "Si")
SITES = ("top", "bridge", "fcc", "hcp")
SPLINE_POINTS = 500
AXIS_LIMITS = {
"C": {"x": (10.0, 30.0), "y": (-10.0, 30.0)},
"H": {"x": None, "y": None},
"N": {"x": None, "y": None},
"O": {"x": None, "y": None},
"S": {"x": None, "y": None},
"Si": {"x": None, "y": None},
}
SITE_COLORS = {
"top": "#1f77b4",
"bridge": "#d62728",
"fcc": "#2ca02c",
"hcp": "#9467bd",
}
DATASETS = (
(TA017_DIR, "solid", "{site}", "energy_{element}_{site}.csv"),
(TA018_DIR, "dashed", "{site}_vac", "energy_{element}_{site}_vac.csv"),
)
def configure_matplotlib() -> None:
plt.rcParams.update({
"font.family": "serif",
"mathtext.fontset": "cm",
"axes.unicode_minus": False,
"axes.linewidth": 1.2,
"xtick.direction": "in",
"ytick.direction": "in",
"xtick.top": True,
"ytick.right": True,
"legend.frameon": False,
"savefig.dpi": 400,
})
def read_curve(path: Path) -> tuple[list[float], list[float]]:
with path.open(newline="") as file:
rows = list(csv.DictReader(file))
points = sorted((float(row[X_COL]), float(row[Y_COL])) for row in rows)
return [x for x, _ in points], [y for _, y in points]
def smooth_curve(
x_values: list[float], y_values: list[float]
) -> tuple[np.ndarray, np.ndarray]:
x_array = np.asarray(x_values, dtype=float)
y_array = np.asarray(y_values, dtype=float)
try:
if len(np.unique(x_array)) < 2:
raise ValueError("Interpolation requires at least two unique x values")
spline = PchipInterpolator(x_array, y_array)
x_smooth = np.linspace(x_array.min(), x_array.max(), SPLINE_POINTS)
y_smooth = spline(x_smooth)
return x_smooth, y_smooth
except Exception:
return x_array, y_array
def apply_axis_limits(ax: plt.Axes, element: str) -> None:
limits = AXIS_LIMITS.get(element, {})
x_limits = limits.get("x")
y_limits = limits.get("y")
if x_limits is not None:
ax.set_xlim(*x_limits)
if y_limits is not None:
ax.set_ylim(*y_limits)
def plot_element(element: str) -> Path:
fig, ax = plt.subplots(figsize=(12.8, 7.2))
plotted_any = False
for site in SITES:
for directory, line_style, label_template, filename_template in DATASETS:
path = directory / filename_template.format(element=element, site=site)
if not path.exists():
continue
x_values, y_values = read_curve(path)
x_plot, y_plot = smooth_curve(x_values, y_values)
label = label_template.format(site=site)
ax.plot(
x_plot, y_plot,
color=SITE_COLORS[site],
linestyle=line_style,
linewidth=1.8,
label=label,
)
plotted_any = True
if not plotted_any:
raise FileNotFoundError(f"No curves found for element {element}")
ax.set_xlabel(r"Adatom z coordinate (Å)")
ax.set_ylabel(r"$\Delta E_{\mathrm{abs}}$ (eV)")
ax.set_title(f"{element} adatom delta adsorption energy")
ax.tick_params(which="both", direction="in", top=True, right=True)
ax.grid(False)
apply_axis_limits(ax, element)
legend_handles = []
for site in SITES:
legend_handles.extend([
Line2D([0], [0], color=SITE_COLORS[site], linestyle="solid",
linewidth=2.0, label=site),
Line2D([0], [0], color=SITE_COLORS[site], linestyle="dashed",
linewidth=2.0, label=f"{site}_vac"),
])
ax.legend(handles=legend_handles, title="Site", loc="best", ncol=2)
fig.tight_layout()
OUT_DIR.mkdir(parents=True, exist_ok=True)
out_path = OUT_DIR / f"energy_{element}.png"
fig.savefig(out_path, dpi=400)
plt.close(fig)
return out_path
def main() -> None:
configure_matplotlib()
for element in ELEMENTS:
out_path = plot_element(element)
print(f"Saved {out_path}")
if __name__ == "__main__":
main()经验总结
- 插值方法的选择取决于数据特征:平滑缓慢变化的数据适合 CubicSpline,含尖峰或快速变化的数据应优先使用 PCHIP。
- 过冲不等于数据错误:曲线异常不一定是读取或计算逻辑有 bug,可能是插值算法的数学特性使然。
- SciPy 提供了丰富的插值选项:
PchipInterpolator只需一行替换即可完成切换,修复成本极低。 - 物理直觉是重要的验证手段:DFT 吸附能曲线在近表面处出现 $10^4\ \mathrm{eV}$ 量级的排斥峰是物理合理的,但插值曲线"制造"出的次级峰则明显违背物理直觉——经验判断是发现这类问题的第一道防线。
References
- Virtanen, Pauli, et al. "SciPy 1.0: Fundamental Algorithms for Scientific Computing in Python." Nature Methods, vol. 17, 2020, pp. 261–272. link
- Fritsch, F. N., and R. E. Carlson. "Monotone Piecewise Cubic Interpolation." SIAM Journal on Numerical Analysis, vol. 17, no. 2, 1980, pp. 238–246. link
- Hunter, John D. "Matplotlib: A 2D Graphics Environment." Computing in Science & Engineering, vol. 9, no. 3, 2007, pp. 90–95. link