批量上传typora图片到图床的脚本

发表于 2025-05-29 本文字数： 15k 阅读时长 ≈ 27 分钟
分享一个脚本，改一改配置项就可以用了~
import os
import re
import requests
import shutil
from pathlib import Path
import time

# --- 配置项 ---
MARKDOWN_DIR = "E:\BLOG\source\_posts"  # <--- 修改这里为你的 Typora 笔记目录!
PICGO_API_URL = "http://127.0.0.1:36677/upload"  # PicGo API 地址
BACKUP_ORIGINAL_FILES = True  # 是否备份原始 Markdown 文件
BACKUP_SUFFIX = ".backup"     # 备份文件的后缀

# --- Regular Expressions ---
# Standard Markdown images: ![alt text](local_path)
# Excludes http/https and data: URIs
IMAGE_REGEX = re.compile(r"!\[(.*?)\]\((?!https?://|data:)(.*?)\)")

# To find complete HTML <img> tags
HTML_TAG_REGEX = re.compile(r"<img[^>]+>", re.IGNORECASE)

# To find 'src' attribute with a local path within an HTML tag string.
# - \bsrc\b: Ensures 'src' is a whole word.
# - \s*=\s*: Allows spaces around '='.
# - (["']): Captures the quote character (Group 1).
# - ((?!(?:https?://|data:))[^"'>]+?): Captures the path if it's not an online URL or data URI (Group 2).
#   - [^"'>]+?: Matches characters that are not quotes or '>', non-greedily.
# - \1: Matches the same quote character that opened the attribute value.
SRC_ATTR_REGEX = re.compile(
    r"""\bsrc\s*=\s*(["'])((?!(?:https?://|data:))[^"'>]+?)\1""",
    re.IGNORECASE
)

def upload_image_to_picgo(image_path):
    """
    使用 PicGo API 上传图片 (发送 JSON 格式的图片路径列表)。
    Args:
        image_path (str): 本地图片的绝对路径。
    Returns:
        str or None: 上传成功返回在线 URL，否则返回 None。
    """
    try:
        payload = {"list": [image_path]}  # PicGo server expects a JSON list of absolute paths
        headers = {"Content-Type": "application/json"}

        # print(f"    Uploading to PicGo: {image_path} with payload: {payload}") # Verbose logging
        response = requests.post(PICGO_API_URL, json=payload, headers=headers, timeout=30) # Added timeout
        response.raise_for_status()  # Raises HTTPError for bad responses (4xx or 5xx)

        result = response.json()

        if result.get("success") and result.get("result"):
            if isinstance(result["result"], list) and len(result["result"]) > 0:
                uploaded_url = result["result"][0]
                # print(f"    PicGo Upload OK: {uploaded_url}") # Verbose logging
                return uploaded_url
            else:
                print(f"    [错误] PicGo 返回的 result 格式不正确: {result.get('result')}")
                return None
        else:
            error_message = result.get('message', '未知错误')
            print(f"    [错误] PicGo 上传失败: {error_message}")
            print(f"    [错误] PicGo 完整响应: {result}")
            return None
    except requests.exceptions.HTTPError as http_err:
        print(f"    [错误] HTTP 错误发生: {http_err}")
        if http_err.response is not None:
            print(f"    [错误] PicGo 服务器响应状态码: {http_err.response.status_code}")
            try:
                print(f"    [错误] PicGo 服务器响应内容: {http_err.response.text}")
            except Exception:
                pass
        return None
    except requests.exceptions.RequestException as e: # Catches ConnectTimeout, ReadTimeout, etc.
        print(f"    [错误] 连接 PicGo API 失败或请求过程中出错: {e}")
        return None
    except Exception as e:
        print(f"    [错误] 上传图片 '{image_path}' 时发生未知错误: {e}")
        return None

def original_full_tag_summary(tag_string, max_len=70):
    """Helper function to print a summary of a tag if it's too long."""
    if len(tag_string) > max_len:
        return tag_string[:max_len-3] + "..."
    return tag_string

def process_markdown_file(md_file_path_str):
    """
    处理单个 Markdown 文件，上传本地图片 (Markdown 和 HTML 格式) 并替换链接。
    """
    print(f"--- 正在处理文件: {md_file_path_str} ---")
    md_file_path_obj = Path(md_file_path_str)
    md_dir = md_file_path_obj.parent

    try:
        with open(md_file_path_obj, 'r', encoding='utf-8') as f:
            original_content = f.read()
    except Exception as e:
        print(f"  [错误] 读取文件 '{md_file_path_str}' 失败: {e}")
        return

    content_being_processed = original_content
    modified_in_this_file = False

    # --- Pass 1: Process standard Markdown images ---
    print("  Pass 1: Processing Markdown images `![alt](path)`...")
    new_md_pass_content_parts = []
    last_md_end = 0
    markdown_images_found_count = 0
    markdown_images_replaced_count = 0

    for match in IMAGE_REGEX.finditer(content_being_processed):
        markdown_images_found_count += 1
        new_md_pass_content_parts.append(content_being_processed[last_md_end:match.start()])
        
        alt_text = match.group(1)
        local_image_path_str = match.group(2)
        original_md_tag = match.group(0)

        print(f"    MD_IMG: Found: {original_full_tag_summary(original_md_tag)}")

        local_image_path = Path(local_image_path_str)
        if not local_image_path.is_absolute():
            absolute_image_path = (md_dir / local_image_path).resolve()
        else:
            absolute_image_path = local_image_path.resolve()

        if not absolute_image_path.exists():
            print(f"    [警告] MD_IMG: File not found, skipping: {absolute_image_path}")
            new_md_pass_content_parts.append(original_md_tag)
        else:
            online_url = upload_image_to_picgo(str(absolute_image_path))
            if online_url:
                new_image_md_tag = f"![{alt_text}]({online_url})"
                new_md_pass_content_parts.append(new_image_md_tag)
                modified_in_this_file = True
                markdown_images_replaced_count += 1
                print(f"    MD_IMG: Replaced with: {new_image_md_tag}")
            else:
                print(f"    MD_IMG: Upload failed for '{local_image_path_str}', skipping replacement.")
                new_md_pass_content_parts.append(original_md_tag)
        last_md_end = match.end()
    
    new_md_pass_content_parts.append(content_being_processed[last_md_end:])
    content_after_md_pass = "".join(new_md_pass_content_parts)

    if markdown_images_found_count > 0:
        print(f"  Pass 1 Summary: Found {markdown_images_found_count} MD images, Replaced {markdown_images_replaced_count}.")
    else:
        print(f"  Pass 1 Summary: No Markdown images `![alt](path)` found.")

    # --- Pass 2: Process HTML <img> tags ---
    print("  Pass 2: Processing HTML images `<img src='path'>`...")
    content_for_html_pass = content_after_md_pass
    new_html_pass_content_parts = []
    last_html_end = 0
    html_tags_processed_count = 0 # Counts <img> tags where local src was sought
    html_src_replaced_count = 0

    for tag_match in HTML_TAG_REGEX.finditer(content_for_html_pass):
        new_html_pass_content_parts.append(content_for_html_pass[last_html_end:tag_match.start()])
        
        original_full_tag = tag_match.group(0)
        modified_tag_output = original_full_tag # Assume not modified unless src is replaced

        src_attr_match = SRC_ATTR_REGEX.search(original_full_tag)

        if src_attr_match:
            html_tags_processed_count +=1
            quote_char = src_attr_match.group(1)
            local_image_path_str = src_attr_match.group(2)
            original_src_attr_part = src_attr_match.group(0)

            print(f"    HTML_IMG: Found local src='{local_image_path_str}' in tag: {original_full_tag_summary(original_full_tag)}")

            local_image_path = Path(local_image_path_str)
            if not local_image_path.is_absolute():
                absolute_image_path = (md_dir / local_image_path).resolve()
            else:
                absolute_image_path = local_image_path.resolve()

            if not absolute_image_path.exists():
                print(f"    [警告] HTML_IMG: File not found, skipping: {absolute_image_path}")
            else:
                online_url = upload_image_to_picgo(str(absolute_image_path))
                if online_url:
                    new_src_attr_part = f'src={quote_char}{online_url}{quote_char}'
                    modified_tag_output = original_full_tag.replace(original_src_attr_part, new_src_attr_part, 1)
                    if original_full_tag != modified_tag_output:
                        modified_in_this_file = True
                        html_src_replaced_count +=1
                        print(f"    HTML_IMG: Replaced src. New tag: {original_full_tag_summary(modified_tag_output)}")
                else:
                    print(f"    HTML_IMG: Upload failed for '{local_image_path_str}', skipping replacement in tag.")
        
        new_html_pass_content_parts.append(modified_tag_output)
        last_html_end = tag_match.end()

    new_html_pass_content_parts.append(content_for_html_pass[last_html_end:])
    final_content = "".join(new_html_pass_content_parts)

    if html_tags_processed_count > 0:
        print(f"  Pass 2 Summary: Processed {html_tags_processed_count} <img> tags for local 'src', Replaced {html_src_replaced_count} 'src' attributes.")
    else:
        print(f"  Pass 2 Summary: No HTML <img> tags with processable local 'src' attributes found.")

    # --- Save file if modified_in_this_file ---
    if modified_in_this_file:
        try:
            if BACKUP_ORIGINAL_FILES:
                backup_file_path = md_file_path_obj.with_suffix(md_file_path_obj.suffix + BACKUP_SUFFIX)
                # Backup the file as it is on disk (original version for this run)
                shutil.copy2(md_file_path_obj, backup_file_path)
                print(f"  已备份原始文件到: {backup_file_path}")

            with open(md_file_path_obj, 'w', encoding='utf-8') as f:
                f.write(final_content)
            print(f"  文件已更新: {md_file_path_str}")
        except Exception as e:
            print(f"  [错误] 写入文件或备份文件失败: {e}")
            if BACKUP_ORIGINAL_FILES and 'backup_file_path' in locals() and Path(backup_file_path).exists():
                try:
                    # Attempt to restore from backup if write failed
                    shutil.copy2(backup_file_path, md_file_path_obj)
                    print(f"  [警告] 写入失败，已尝试从备份 {backup_file_path} 恢复。请检查文件。")
                except Exception as restore_e:
                    print(f"  [严重错误] 写入失败且恢复备份也失败: {restore_e}。原始文件可能已损坏，请从手动备份中恢复。")
    else:
        print(f"  文件 '{md_file_path_str}' 无需修改。")
    print("-" * 30)


def main():
    """
    主函数，遍历目录并处理 Markdown 文件。
    """
    markdown_dir_path = Path(MARKDOWN_DIR)
    if not markdown_dir_path.is_dir():
        print(f"[错误] 指定的目录不存在或不是一个目录: {MARKDOWN_DIR}")
        return

    print(f"开始扫描目录: {markdown_dir_path}")
    file_count = 0
    for md_file in markdown_dir_path.rglob("*.md"): # rglob 会递归查找所有子目录中的 .md 文件
        if BACKUP_SUFFIX in md_file.name: # Skip backup files
            print(f"跳过备份文件: {md_file}")
            continue
        process_markdown_file(str(md_file))
        file_count += 1
    
    if file_count == 0:
        print("在指定目录中未找到 .md 文件。")
    else:
        print(f"所有 {file_count} 个 Markdown 文件处理完毕。")

if __name__ == "__main__":
    print("*********************************************************************")
    print("* Typora 图片上传脚本 (Markdown & HTML)                              *")
    print("*********************************************************************")
    print("* 重要提示:                                                          *")
    print("* 1. 请确保 PicGo 正在运行并已正确配置图床和Server。                *")
    print("* 2. 脚本将修改 Markdown 文件中的本地图片链接。                     *")
    print(f"* 3. 配置的笔记目录: {MARKDOWN_DIR}                         *")
    print(f"* 4. PicGo API: {PICGO_API_URL}                                 *")
    if BACKUP_ORIGINAL_FILES:
        print(f"* 5. 原始文件将备份为 *.md{BACKUP_SUFFIX}。                         *")
    else:
        print("* 5. 文件备份已禁用。                                               *")
    print("* 6. 强烈建议在首次运行或对重要笔记操作前备份您的整个笔记目录。     *")
    print("*********************************************************************\n")

    if MARKDOWN_DIR == "/path/to/your/typora/notes":
        print("[配置错误] 请务必修改脚本中的 `MARKDOWN_DIR`变量，指向你的 Typora 笔记目录！")
        exit(1)

    confirm = input(f"确认开始处理目录 '{MARKDOWN_DIR}' 下的 Markdown 文件吗? (yes/no): ").lower()
    if confirm == 'yes':
        print("脚本将在 3 秒后开始执行... 按 Ctrl+C 取消。")
        try:
            time.sleep(3)
            main()
        except KeyboardInterrupt:
            print("\n操作已由用户取消。")
        except Exception as e:
            print(f"\n[严重错误] 脚本执行过程中发生意外错误: {e}")
    else:
        print("操作已取消。")