365Tools
发布时间:2026-01-22 17:25:23
PDF DPI知识:PDF的默认分辨率是72DPI,要生成200DPI的图片,需要将页面放大 200/72 ≈ 2.78 倍
2. 图片保存质量优化
添加了quality=95参数(默认是75),大幅提升JPEG清晰度
启用optimize=True,在不损失质量的情况下压缩图片
import fitz # PyMuPDF
import os
'''
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple Pillow
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple PyMuPDF
'''
from PIL import Image
import pymysql
import json
conn = pymysql.connect(
host='localhost',
user='root',
password='root',
database='xx_xxx_com',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor # 返回字典格式
)
cursor = conn.cursor()
def mysql2():
cursor.execute("SELECT * FROM abcd_pdf where img is null ")
results = cursor.fetchall() # 获取全部结果
for row in results:
image_path(row['filepath'],row['id'])
print(row['filepath'])
def image_path(pdf_path1,id):
pdf_path = pdf_path1
output_folder = "E:/task\pdf_images"
arr = convert_pdf_to_images(pdf_path, output_folder,id)
json_compact = json.dumps(arr)
s = cursor.execute(f"update abcd_pdf set img='{json_compact}' where id={id}")
def convert_pdf_to_images(pdf_path, output_folder,id, resolution=200):
# 打开 PDF 文件
pdf_document = fitz.open(pdf_path)
arr = []
# 遍历 PDF 中的每一页
for page_number in range(len(pdf_document)):
if page_number>=3:
break
page = pdf_document[page_number]
# 核心修复:计算缩放比例以确保输出图片的**宽/高至少达到目标分辨率**
# 原始逻辑是缩小图片,现在改为放大到目标分辨率
zoom_x = resolution / 72 # 72是PDF默认DPI,将目标分辨率转换为缩放因子
zoom_y = resolution / 72
# 创建带抗锯齿的缩放矩阵
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(0) # prerotate确保方向正确
# 将 PDF 页面转换为图片(使用更高质量的参数)
pix = page.get_pixmap(
matrix=mat,
alpha=False, # 关闭透明度(PDF通常不透明)
dpi=resolution # 明确指定DPI
)
# 转换为PIL Image对象
image = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
# 修复字符串格式化语法
filename = f"{id}0000{page_number}.jpg"
image_filename = f"{output_folder}/{filename}"
# 保存图片时使用高质量参数
image.save(
image_filename,
"JPEG",
quality=100, # 提高JPEG质量(0-100)
optimize=True
)
arr.append(filename)
print(f"Saved {image_filename}")
# 关闭 PDF 文件
pdf_document.close()
return arr
if __name__ == '__main__':
mysql2()
# 使用示例
# pdf_path = "D:\BaiduNetdiskDownload\学习\其它/1.pdf"
# output_folder = "output_images"
# os.makedirs(output_folder, exist_ok=True) # 确保输出文件夹存在
# convert_pdf_to_images(pdf_path, output_folder, resolution=75)