pdf 转成图片示例代码

发布时间:2026-01-22 17:25:23

PDF DPI知识:PDF的默认分辨率是72DPI,要生成200DPI的图片,需要将页面放大 200/72 ≈ 2.78 倍
2. 图片保存质量优化
添加了quality=95参数(默认是75),大幅提升JPEG清晰度
启用optimize=True,在不损失质量的情况下压缩图片

import fitz  # PyMuPDF
import os
'''
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple Pillow
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple PyMuPDF
'''
from PIL import Image
import pymysql
import json

conn = pymysql.connect(
    host='localhost',
    user='root',
    password='root',
    database='xx_xxx_com',
    charset='utf8mb4',
    cursorclass=pymysql.cursors.DictCursor  # 返回字典格式
)

cursor = conn.cursor()

def mysql2():
    cursor.execute("SELECT * FROM abcd_pdf where img is null ")
    results = cursor.fetchall()  # 获取全部结果
    for row in results:

        image_path(row['filepath'],row['id'])
        print(row['filepath'])

def image_path(pdf_path1,id):
    pdf_path = pdf_path1
    output_folder = "E:/task\pdf_images"

    arr = convert_pdf_to_images(pdf_path, output_folder,id)
    json_compact = json.dumps(arr)

    s = cursor.execute(f"update abcd_pdf set img='{json_compact}' where id={id}")


def convert_pdf_to_images(pdf_path, output_folder,id, resolution=200):
    # 打开 PDF 文件
    pdf_document = fitz.open(pdf_path)

    arr = []
    # 遍历 PDF 中的每一页
    for page_number in range(len(pdf_document)):

        if page_number>=3:
            break

        page = pdf_document[page_number]

        # 核心修复:计算缩放比例以确保输出图片的**宽/高至少达到目标分辨率**
        # 原始逻辑是缩小图片,现在改为放大到目标分辨率
        zoom_x = resolution / 72  # 72是PDF默认DPI,将目标分辨率转换为缩放因子
        zoom_y = resolution / 72

        # 创建带抗锯齿的缩放矩阵
        mat = fitz.Matrix(zoom_x, zoom_y).prerotate(0)  # prerotate确保方向正确

        # 将 PDF 页面转换为图片(使用更高质量的参数)
        pix = page.get_pixmap(
            matrix=mat,
            alpha=False,  # 关闭透明度(PDF通常不透明)
            dpi=resolution  # 明确指定DPI
        )

        # 转换为PIL Image对象
        image = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)

        # 修复字符串格式化语法
        filename = f"{id}0000{page_number}.jpg"


        image_filename = f"{output_folder}/{filename}"

        # 保存图片时使用高质量参数
        image.save(
            image_filename,
            "JPEG",
            quality=100,  # 提高JPEG质量(0-100)
            optimize=True
        )

        arr.append(filename)

        print(f"Saved {image_filename}")

    # 关闭 PDF 文件
    pdf_document.close()


    return arr

if __name__ == '__main__':

    mysql2()

    # 使用示例
    # pdf_path = "D:\BaiduNetdiskDownload\学习\其它/1.pdf"
    # output_folder = "output_images"
    # os.makedirs(output_folder, exist_ok=True)  # 确保输出文件夹存在
    # convert_pdf_to_images(pdf_path, output_folder, resolution=75)