2026-05-09 22:52:09 +01:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
|
|
|
|
yopu.co 和弦谱抓取(v2)
|
|
|
|
|
|
|
|
|
|
|
|
跟旧 guitar 版相比,UI 改了:现在是分立的 row:
|
|
|
|
|
|
- "谱面样式" → 选 "功能谱"
|
|
|
|
|
|
- "和弦样式" → 选 "级数名"
|
|
|
|
|
|
- "和弦图" → 默认(不动)
|
|
|
|
|
|
|
|
|
|
|
|
抓取流程:
|
|
|
|
|
|
1. /explore#q=<query> 搜索
|
|
|
|
|
|
2. 找第一个含「和弦谱」字样的结果 → 进 /view/<id>
|
|
|
|
|
|
3. 在 row label = X 的行里,点 button.option 文本 = Y
|
|
|
|
|
|
4. 撑开 div.sheet-container 容器把 overflow / max-height 砍掉,让全部内容渲染
|
|
|
|
|
|
5. 截图整个 container element
|
|
|
|
|
|
6. PIL 裁白边 + padding,存 PNG
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
import time
|
|
|
|
|
|
import logging
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from urllib.parse import quote, urlparse, urljoin
|
|
|
|
|
|
|
|
|
|
|
|
from selenium import webdriver
|
|
|
|
|
|
from selenium.webdriver.chrome.service import Service
|
|
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
|
|
from selenium.common.exceptions import TimeoutException
|
|
|
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_driver(window="1920,5000"):
|
|
|
|
|
|
o = Options()
|
|
|
|
|
|
o.add_argument('--headless=new')
|
|
|
|
|
|
o.add_argument('--no-sandbox')
|
|
|
|
|
|
o.add_argument('--disable-dev-shm-usage')
|
|
|
|
|
|
o.add_argument('--disable-gpu')
|
|
|
|
|
|
o.add_argument(f'--window-size={window}')
|
|
|
|
|
|
o.add_argument('--lang=zh-CN')
|
|
|
|
|
|
o.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
|
|
|
|
|
|
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
|
|
|
|
|
|
o.add_experimental_option('prefs', {'intl.accept_languages': 'zh-CN,zh,en-US,en'})
|
|
|
|
|
|
|
|
|
|
|
|
service = None
|
|
|
|
|
|
if cdp := os.getenv('CHROMEDRIVER_PATH'):
|
|
|
|
|
|
service = Service(cdp)
|
|
|
|
|
|
if cb := os.getenv('CHROME_BIN'):
|
|
|
|
|
|
o.binary_location = cb
|
|
|
|
|
|
return webdriver.Chrome(service=service, options=o)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-10 15:10:03 +01:00
|
|
|
|
def find_chart(driver, title: str, artist: str, prefer: str = 'functional'):
|
|
|
|
|
|
"""在 /song?title=&artist= 找最佳候选 view。
|
2026-05-09 23:02:34 +01:00
|
|
|
|
|
2026-05-10 15:10:03 +01:00
|
|
|
|
yopu 同一首歌一般有多个版本,按搜索结果里 nier-snippet 内的
|
|
|
|
|
|
SVG <text> 数量区分:
|
|
|
|
|
|
- svg_text > 0 → chord 字母版(G/Em7/C),民间叫弹唱谱
|
|
|
|
|
|
- svg_text == 0 → 功能谱 / 数字级数版
|
2026-05-09 23:15:41 +01:00
|
|
|
|
|
2026-05-10 15:10:03 +01:00
|
|
|
|
`prefer` ∈ {'letters', 'functional'},按需求挑第一个匹配的。
|
|
|
|
|
|
实在没匹配就 fallback 到第一个非空候选。
|
2026-05-09 23:02:34 +01:00
|
|
|
|
"""
|
2026-05-10 15:10:03 +01:00
|
|
|
|
from urllib.parse import urlencode
|
|
|
|
|
|
base = 'https://yopu.co/song'
|
|
|
|
|
|
# /song 用 hash 传参(跟 yopu 前端约定一致)
|
|
|
|
|
|
search_url = f"{base}#title={quote(title)}&artist={quote(artist)}"
|
|
|
|
|
|
logger.info("loading /song: %s", search_url)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
driver.get(search_url)
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
2026-05-09 23:02:34 +01:00
|
|
|
|
hits = driver.execute_script("""
|
|
|
|
|
|
var out = [];
|
2026-05-09 22:52:09 +01:00
|
|
|
|
var posts = document.querySelectorAll('a.post-main');
|
|
|
|
|
|
for (var i = 0; i < posts.length; i++) {
|
2026-05-09 23:02:34 +01:00
|
|
|
|
var p = posts[i];
|
|
|
|
|
|
var titleEl = p.querySelector('.title-line .title, .title');
|
|
|
|
|
|
var subEl = p.querySelector('.title-line .subtitle, .subtitle');
|
|
|
|
|
|
var info = p.querySelector('.one-line-info');
|
2026-05-09 23:15:41 +01:00
|
|
|
|
var snippet = p.querySelector('.nier-snippet');
|
|
|
|
|
|
var svgTextCount = snippet ? snippet.querySelectorAll('svg text').length : 0;
|
2026-05-10 15:10:03 +01:00
|
|
|
|
// 任何子元素 class 含 'verified' 都算(svelte 加了 hash class)
|
|
|
|
|
|
var isVerified = p.querySelectorAll('[class*="verified"]').length > 0;
|
2026-05-09 23:02:34 +01:00
|
|
|
|
out.push({
|
|
|
|
|
|
href: p.href,
|
|
|
|
|
|
title: titleEl ? (titleEl.textContent || '').trim() : '',
|
|
|
|
|
|
subtitle: subEl ? (subEl.textContent || '').trim() : '',
|
|
|
|
|
|
info: info ? (info.textContent || '').trim() : '',
|
2026-05-09 23:15:41 +01:00
|
|
|
|
svgTextCount: svgTextCount,
|
2026-05-10 15:10:03 +01:00
|
|
|
|
isLetters: svgTextCount > 0,
|
|
|
|
|
|
isFunctional: svgTextCount === 0,
|
|
|
|
|
|
isVerified: isVerified,
|
2026-05-09 23:02:34 +01:00
|
|
|
|
});
|
2026-05-09 22:52:09 +01:00
|
|
|
|
}
|
2026-05-09 23:02:34 +01:00
|
|
|
|
return out;
|
2026-05-09 22:52:09 +01:00
|
|
|
|
""")
|
|
|
|
|
|
|
2026-05-09 23:02:34 +01:00
|
|
|
|
if not hits:
|
2026-05-10 15:10:03 +01:00
|
|
|
|
logger.warning("no a.post-main found at /song — fallback to /explore")
|
|
|
|
|
|
# fallback: yopu /song 偶尔没结果,回退到 /explore
|
|
|
|
|
|
from urllib.parse import quote as _q
|
|
|
|
|
|
q = (artist + ' ' + title).strip()
|
|
|
|
|
|
driver.get(f"https://yopu.co/explore#q={_q(q)}")
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
hits = driver.execute_script("""
|
|
|
|
|
|
var out = [];
|
|
|
|
|
|
var posts = document.querySelectorAll('a.post-main');
|
|
|
|
|
|
for (var i = 0; i < posts.length; i++) {
|
|
|
|
|
|
var p = posts[i];
|
|
|
|
|
|
var titleEl = p.querySelector('.title-line .title, .title');
|
|
|
|
|
|
var subEl = p.querySelector('.title-line .subtitle, .subtitle');
|
|
|
|
|
|
var info = p.querySelector('.one-line-info');
|
|
|
|
|
|
var snippet = p.querySelector('.nier-snippet');
|
|
|
|
|
|
var svgTextCount = snippet ? snippet.querySelectorAll('svg text').length : 0;
|
|
|
|
|
|
out.push({
|
|
|
|
|
|
href: p.href,
|
|
|
|
|
|
title: titleEl ? (titleEl.textContent || '').trim() : '',
|
|
|
|
|
|
subtitle: subEl ? (subEl.textContent || '').trim() : '',
|
|
|
|
|
|
info: info ? (info.textContent || '').trim() : '',
|
|
|
|
|
|
svgTextCount: svgTextCount,
|
|
|
|
|
|
isLetters: svgTextCount > 0,
|
|
|
|
|
|
isFunctional: svgTextCount === 0,
|
|
|
|
|
|
isVerified: false,
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
return out;
|
|
|
|
|
|
""")
|
|
|
|
|
|
if not hits:
|
2026-05-09 22:52:09 +01:00
|
|
|
|
return None
|
2026-05-09 23:02:34 +01:00
|
|
|
|
|
2026-05-10 15:10:03 +01:00
|
|
|
|
# 优先匹配 prefer;同时优先 verified(虽然匿名访问大概率全是 false)
|
|
|
|
|
|
def _key(h):
|
|
|
|
|
|
match_pref = (prefer == 'letters' and h['isLetters']) or \
|
|
|
|
|
|
(prefer == 'functional' and h['isFunctional'])
|
|
|
|
|
|
# 数值越小越优先:first match_pref+verified, then match_pref, then verified, then all
|
|
|
|
|
|
return (0 if (match_pref and h['isVerified']) else
|
|
|
|
|
|
1 if match_pref else
|
|
|
|
|
|
2 if h['isVerified'] else 3)
|
|
|
|
|
|
|
|
|
|
|
|
sorted_hits = sorted(hits, key=_key)
|
|
|
|
|
|
chosen = sorted_hits[0]
|
|
|
|
|
|
matched = (prefer == 'letters' and chosen['isLetters']) or \
|
|
|
|
|
|
(prefer == 'functional' and chosen['isFunctional'])
|
|
|
|
|
|
kind = prefer if matched else f"{prefer}-fallback"
|
2026-05-09 23:15:41 +01:00
|
|
|
|
|
|
|
|
|
|
href = chosen['href']
|
2026-05-09 22:52:09 +01:00
|
|
|
|
if href.startswith('/'):
|
2026-05-10 15:10:03 +01:00
|
|
|
|
p = urlparse(driver.current_url)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
href = f"{p.scheme}://{p.netloc}{href}"
|
|
|
|
|
|
elif not href.startswith('http'):
|
2026-05-10 15:10:03 +01:00
|
|
|
|
href = urljoin(driver.current_url, href)
|
|
|
|
|
|
logger.info("[%s] %s — %s [%s] verified=%s (total %d, letters=%d, functional=%d, verified=%d)",
|
2026-05-09 23:15:41 +01:00
|
|
|
|
kind, chosen['title'], chosen['subtitle'], chosen['info'],
|
2026-05-10 15:10:03 +01:00
|
|
|
|
chosen['isVerified'], len(hits),
|
|
|
|
|
|
sum(1 for h in hits if h['isLetters']),
|
|
|
|
|
|
sum(1 for h in hits if h['isFunctional']),
|
|
|
|
|
|
sum(1 for h in hits if h['isVerified']))
|
2026-05-09 23:02:34 +01:00
|
|
|
|
return {
|
|
|
|
|
|
'url': href,
|
2026-05-09 23:15:41 +01:00
|
|
|
|
'title': chosen.get('title') or '',
|
|
|
|
|
|
'subtitle': chosen.get('subtitle') or '',
|
|
|
|
|
|
'text': chosen.get('info') or '',
|
|
|
|
|
|
'kind': kind,
|
2026-05-09 23:02:34 +01:00
|
|
|
|
}
|
2026-05-09 22:52:09 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def select_option_in_row(driver, row_label, button_text, timeout=10):
|
|
|
|
|
|
"""在 label 含 row_label 的 row 里,点 button.option 文本含 button_text 的按钮。
|
|
|
|
|
|
返回 True 表示点了;False 表示找不到(不算错误,可能是 UI 文案变了)。"""
|
2026-05-09 23:19:16 +01:00
|
|
|
|
# 短 timeout:当前 yopu UI 普遍没这些 row,best-effort 不卡流程
|
|
|
|
|
|
wait = WebDriverWait(driver, min(timeout, 3))
|
2026-05-09 22:52:09 +01:00
|
|
|
|
try:
|
|
|
|
|
|
row = wait.until(EC.presence_of_element_located((
|
|
|
|
|
|
By.XPATH,
|
|
|
|
|
|
f"//div[contains(@class, 'row')][.//div[contains(@class, 'label') "
|
|
|
|
|
|
f"and contains(normalize-space(.), '{row_label}')]]"
|
|
|
|
|
|
)))
|
|
|
|
|
|
except TimeoutException:
|
2026-05-09 23:19:16 +01:00
|
|
|
|
logger.debug("row '%s' not present (skipped)", row_label)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
buttons = row.find_elements(By.CSS_SELECTOR, "button.option, button")
|
|
|
|
|
|
for btn in buttons:
|
|
|
|
|
|
txt = (btn.text or '').strip()
|
|
|
|
|
|
if button_text in txt:
|
|
|
|
|
|
try:
|
|
|
|
|
|
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
|
|
|
|
|
|
time.sleep(0.3)
|
|
|
|
|
|
btn.click()
|
|
|
|
|
|
logger.info("clicked '%s' in row '%s'", button_text, row_label)
|
|
|
|
|
|
time.sleep(1.2)
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
2026-05-09 23:19:16 +01:00
|
|
|
|
logger.debug("click failed in row '%s' / '%s': %s", row_label, button_text, e)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
return False
|
2026-05-09 23:19:16 +01:00
|
|
|
|
logger.debug("button '%s' not found in row '%s'", button_text, row_label)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def expand_sheet_container(driver, container):
|
|
|
|
|
|
"""把 sheet-container 跟它的祖先一起把 overflow / max-height 拆掉,
|
|
|
|
|
|
让 scrollHeight 全暴露,截图能拿到完整谱面。"""
|
|
|
|
|
|
return driver.execute_script("""
|
|
|
|
|
|
var c = arguments[0];
|
|
|
|
|
|
var origStyle = c.getAttribute('style') || '';
|
|
|
|
|
|
var modified = [];
|
|
|
|
|
|
var node = c;
|
|
|
|
|
|
while (node && node !== document.body) {
|
|
|
|
|
|
var cs = window.getComputedStyle(node);
|
|
|
|
|
|
if (cs.overflow === 'hidden' || cs.overflow === 'auto'
|
|
|
|
|
|
|| cs.overflowY === 'hidden' || cs.overflowY === 'auto'
|
|
|
|
|
|
|| cs.maxHeight !== 'none') {
|
|
|
|
|
|
modified.push({ el: node, orig: node.getAttribute('style') || '' });
|
|
|
|
|
|
node.style.overflow = 'visible';
|
|
|
|
|
|
node.style.overflowY = 'visible';
|
|
|
|
|
|
node.style.maxHeight = 'none';
|
|
|
|
|
|
node.style.height = 'auto';
|
|
|
|
|
|
}
|
|
|
|
|
|
node = node.parentElement;
|
|
|
|
|
|
}
|
|
|
|
|
|
c.style.overflow = 'visible';
|
|
|
|
|
|
c.style.maxHeight = 'none';
|
|
|
|
|
|
c.style.height = 'auto';
|
|
|
|
|
|
c.style.minHeight = c.scrollHeight + 'px';
|
|
|
|
|
|
c.offsetHeight; // force reflow
|
|
|
|
|
|
c.setAttribute('data-orig-style', origStyle);
|
|
|
|
|
|
window.__yopuModified = modified;
|
|
|
|
|
|
return { scrollHeight: c.scrollHeight, modified: modified.length };
|
|
|
|
|
|
""", container)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def crop_white(path, pad_top=20, pad_bottom=50, pad_left=20, pad_right=20, white_th=250):
|
|
|
|
|
|
"""裁掉四边的白边,加点 padding。"""
|
|
|
|
|
|
img = Image.open(path)
|
|
|
|
|
|
w, h = img.size
|
|
|
|
|
|
if img.mode != 'RGB':
|
|
|
|
|
|
img = img.convert('RGB')
|
|
|
|
|
|
px = img.load()
|
|
|
|
|
|
|
|
|
|
|
|
def row_white_ratio(y):
|
|
|
|
|
|
wp = 0
|
|
|
|
|
|
for x in range(w):
|
|
|
|
|
|
r, g, b = px[x, y]
|
|
|
|
|
|
if r > white_th and g > white_th and b > white_th:
|
|
|
|
|
|
wp += 1
|
|
|
|
|
|
return wp / w
|
|
|
|
|
|
|
|
|
|
|
|
def col_white_ratio(x, y0, y1):
|
|
|
|
|
|
wp = 0
|
|
|
|
|
|
rng = max(1, y1 - y0)
|
|
|
|
|
|
for y in range(y0, y1):
|
|
|
|
|
|
r, g, b = px[x, y]
|
|
|
|
|
|
if r > white_th and g > white_th and b > white_th:
|
|
|
|
|
|
wp += 1
|
|
|
|
|
|
return wp / rng
|
|
|
|
|
|
|
|
|
|
|
|
top = 0
|
|
|
|
|
|
for y in range(h):
|
|
|
|
|
|
if row_white_ratio(y) < 0.99:
|
|
|
|
|
|
top = y
|
|
|
|
|
|
break
|
|
|
|
|
|
bottom = h
|
|
|
|
|
|
for y in range(h - 1, -1, -1):
|
|
|
|
|
|
if row_white_ratio(y) < 0.99:
|
|
|
|
|
|
bottom = y + 1
|
|
|
|
|
|
break
|
|
|
|
|
|
if top >= bottom:
|
|
|
|
|
|
return # all white, give up
|
|
|
|
|
|
|
|
|
|
|
|
left = 0
|
|
|
|
|
|
for x in range(w):
|
|
|
|
|
|
if col_white_ratio(x, top, bottom) < 0.99:
|
|
|
|
|
|
left = x
|
|
|
|
|
|
break
|
|
|
|
|
|
right = w
|
|
|
|
|
|
for x in range(w - 1, -1, -1):
|
|
|
|
|
|
if col_white_ratio(x, top, bottom) < 0.99:
|
|
|
|
|
|
right = x + 1
|
|
|
|
|
|
break
|
|
|
|
|
|
if left >= right:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
box = (
|
|
|
|
|
|
max(0, left - pad_left),
|
|
|
|
|
|
max(0, top - pad_top),
|
|
|
|
|
|
min(w, right + pad_right),
|
|
|
|
|
|
min(h, bottom + pad_bottom),
|
|
|
|
|
|
)
|
|
|
|
|
|
img.crop(box).save(path, 'PNG')
|
|
|
|
|
|
logger.info("cropped to %s", box)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-09 23:02:34 +01:00
|
|
|
|
DEBUG_DIR = Path('/data/chord-debug')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _save_debug(driver, tag: str):
|
|
|
|
|
|
"""失败时 dump 当前 HTML + 截图到 /data/chord-debug 方便排查。"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
DEBUG_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
ts = int(time.time())
|
|
|
|
|
|
(DEBUG_DIR / f'{tag}-{ts}.html').write_text(driver.page_source, encoding='utf-8')
|
|
|
|
|
|
driver.save_screenshot(str(DEBUG_DIR / f'{tag}-{ts}.png'))
|
|
|
|
|
|
logger.info("debug snapshot saved: %s/%s-%d.{html,png}", DEBUG_DIR, tag, ts)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("debug snapshot failed: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-10 15:10:03 +01:00
|
|
|
|
def fetch_chord_chart(title: str, artist: str, output_path: str, *,
|
|
|
|
|
|
mode: str = 'functional',
|
2026-05-09 22:52:09 +01:00
|
|
|
|
sheet_style: str = '功能谱',
|
|
|
|
|
|
chord_style: str = '级数名',
|
|
|
|
|
|
verbose: bool = False) -> tuple[bool, str]:
|
2026-05-10 15:10:03 +01:00
|
|
|
|
"""搜 yopu /song、按 mode 挑候选 view、截图。
|
|
|
|
|
|
mode='functional' → 数字级数版;mode='letters' → 字母版(弹唱谱)。
|
|
|
|
|
|
返回 (ok, msg)。
|
2026-05-09 22:52:09 +01:00
|
|
|
|
"""
|
|
|
|
|
|
if verbose:
|
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s')
|
|
|
|
|
|
else:
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
|
|
|
|
|
|
|
|
|
|
|
|
driver = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
driver = setup_driver()
|
2026-05-10 15:10:03 +01:00
|
|
|
|
result = find_chart(driver, title, artist, prefer=mode)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
if not result:
|
2026-05-09 23:02:34 +01:00
|
|
|
|
_save_debug(driver, 'no-search-hit')
|
2026-05-09 22:52:09 +01:00
|
|
|
|
return False, '未找到和弦谱'
|
|
|
|
|
|
view_url = result['url']
|
|
|
|
|
|
|
|
|
|
|
|
logger.info("loading view: %s", view_url)
|
|
|
|
|
|
driver.get(view_url)
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
2026-05-09 23:19:16 +01:00
|
|
|
|
# 旧 yopu UI 在 view 页有「谱面样式 / 和弦样式」row 可切;
|
|
|
|
|
|
# 新 yopu 已经下线了这些(要登录 APP 才能切),所以用搜索阶段
|
|
|
|
|
|
# 选「功能谱」版本绕过去。这里 best-effort 试一下,找不到不算错误。
|
|
|
|
|
|
select_option_in_row(driver, '谱面样式', sheet_style)
|
|
|
|
|
|
select_option_in_row(driver, '和弦样式', chord_style)
|
2026-05-09 22:52:09 +01:00
|
|
|
|
|
|
|
|
|
|
# 等内容刷新
|
|
|
|
|
|
time.sleep(1.5)
|
|
|
|
|
|
|
|
|
|
|
|
wait = WebDriverWait(driver, 15)
|
2026-05-09 23:02:34 +01:00
|
|
|
|
try:
|
|
|
|
|
|
sheet = wait.until(EC.presence_of_element_located(
|
|
|
|
|
|
(By.CSS_SELECTOR, "div.sheet-container")
|
|
|
|
|
|
))
|
|
|
|
|
|
except TimeoutException:
|
|
|
|
|
|
_save_debug(driver, 'no-sheet-container')
|
|
|
|
|
|
raise
|
2026-05-09 22:52:09 +01:00
|
|
|
|
|
|
|
|
|
|
driver.execute_script("arguments[0].scrollIntoView(true);", sheet)
|
|
|
|
|
|
time.sleep(0.5)
|
|
|
|
|
|
|
|
|
|
|
|
dims = expand_sheet_container(driver, sheet)
|
|
|
|
|
|
logger.debug("expanded scrollHeight=%s, modified=%s ancestors", dims['scrollHeight'], dims['modified'])
|
|
|
|
|
|
time.sleep(1.5)
|
|
|
|
|
|
|
|
|
|
|
|
# incrButton:放大字号 / chord size,跟旧版一样点 3 次
|
|
|
|
|
|
try:
|
|
|
|
|
|
buttons = driver.find_elements(By.CSS_SELECTOR, "button.incrButton")
|
|
|
|
|
|
if buttons:
|
|
|
|
|
|
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", buttons[0])
|
|
|
|
|
|
time.sleep(0.3)
|
|
|
|
|
|
for _ in range(3):
|
|
|
|
|
|
buttons[0].click()
|
|
|
|
|
|
time.sleep(0.4)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("incrButton failed: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
time.sleep(1.0)
|
|
|
|
|
|
|
|
|
|
|
|
# 滚 sheet 内部回到顶部,截整个 container
|
|
|
|
|
|
driver.execute_script("arguments[0].scrollTop = 0;", sheet)
|
|
|
|
|
|
time.sleep(0.4)
|
|
|
|
|
|
|
|
|
|
|
|
out = Path(output_path)
|
|
|
|
|
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
sheet.screenshot(str(out))
|
|
|
|
|
|
if not out.exists() or out.stat().st_size < 100:
|
|
|
|
|
|
return False, '截图为空'
|
|
|
|
|
|
logger.info("screenshot: %s (%d bytes)", out, out.stat().st_size)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
crop_white(str(out))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("crop failed: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
return True, str(out)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error("fetch failed: %s", e, exc_info=True)
|
|
|
|
|
|
return False, str(e)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
if driver:
|
|
|
|
|
|
try:
|
|
|
|
|
|
driver.quit()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|