分类：学习笔记

【Node.js】批量导出微信联系人到Excel表格（基于Wechaty）

用到的库：

wechaty: 一个用于开发微信聊天机器人的框架。

qrcode-terminal: 用于生成终端二维码的库。

xlsx: 用于处理 Excel 文件的库。

fs 和 path: Node.js 的内置文件系统模块，用于文件操作。

file-box: 用于处理文件（如图片）的库。

uuid: 用于生成唯一标识符（UUID）的库。

工作原理：

基于wechaty的api，获取相应信息并批量导出到excel表格中，头像文件夹单独放置。可以在WPS中依靠UUID生成的唯一ID来快速批量嵌入头像。

可惜标签功能不在Wechaty的功能中，也没法导出手机号等更有价值的信息，目前能导出的信息不多，图一乐。

const { WechatyBuilder } = require('wechaty');
const qrcode = require('qrcode-terminal');
const xlsx = require('xlsx');
const fs = require('fs');
const path = require('path');
const { FileBox } = require('file-box');
const { v4: uuidv4 } = require('uuid');  // 引入 UUID 库

const outputDir = './avatars/';  // 存储头像的文件夹

// 确保头像目录存在
if (!fs.existsSync(outputDir)) {
  fs.mkdirSync(outputDir);
}

class WeChaty {
  bot = null;

  constructor() {
    this.bot = WechatyBuilder.build();
    this.bot.on('scan', code => {
      qrcode.generate(code, { small: true });
    });
    this.bot.on('login', user => {
      console.log(`登录成功，欢迎 ${user}`);
      this.waitForContacts();  // 登录后开始等待联系人同步
    });
  }

  // 随机延时函数，返回一个 Promise，用于模拟短时延时
  delay(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  // 每10秒检查一次联系人数量，最多检查10次
  async waitForContacts() {
    let previousCount = 0;
    let currentCount = 0;
    let attempts = 0;

    while (attempts < 10) {
      const allContacts = await this.bot.Contact.findAll();  // 获取所有联系人
      currentCount = allContacts.length;

      if (currentCount === previousCount) {
        console.log('联系人数量没有变化，开始导出');
        break;  // 如果联系人数量没有变化，则认为同步完成
      }

      console.log(`当前联系人数量: ${currentCount}, 尝试次数: ${attempts + 1}`);
      previousCount = currentCount;
      attempts++;

      await this.delay(10000);  // 每次等待10秒再检查
    }

    if (attempts >= 10) {
      console.log('尝试多次后联系人数量没有变化，开始导出');
    }

    // 导出联系人
    this.getAllFriendContacts();
  }

  // 获取所有好友联系人
  async getAllFriendContacts() {
    const allContacts = await this.bot.Contact.findAll();  // 获取所有联系人

    // 只筛选出好友联系人
    const friendContacts = allContacts.filter(contact => 
      contact.friend() && contact.type() === this.bot.Contact.Type.Individual
    );

    console.log(`总共获取了 ${friendContacts.length} 个好友联系人`);

    if (friendContacts.length > 0) {
      await this.exportContacts(friendContacts);  // 导出好友联系人
    }
  }

  // 获取并保存联系人头像
  async saveAvatar(contact) {
    try {
      const avatarFile = await contact.avatar();
      if (avatarFile) {
        const cleanName = contact.name().replace(/[\/\\:*?"<>|]/g, '_');  // 清理非法字符
        const uniqueName = `${cleanName}_${uuidv4()}`;  // 使用清理后的昵称 + UUID 作为文件名
        const avatarPath = path.join(outputDir, uniqueName + '.jpg');  // 存储路径

        await avatarFile.toFile(avatarPath, true);  // 保存头像文件
        return uniqueName;  // 返回不带后缀的文件名
      }
    } catch (error) {
      console.log(`获取 ${contact.name()} 头像失败`);
    }
    return '';  // 如果获取失败，返回空字符串
  }

  // 导出联系人信息
  async exportContacts(allContacts) {
    try {
      const contactList = [];

      const avatarPromises = allContacts.map(async (contact) => {
        // 获取头像文件名
        const avatarFileName = await this.saveAvatar(contact);  

        // 只导出有头像的联系人，且即使没有昵称也会保留
        return {
          昵称: contact.name().trim() || '无昵称',  // 如果没有昵称，则使用 '无昵称'
          备注: (await contact.alias())?.trim() || '',  // 获取备注信息
          性别: contact.gender() === this.bot.Contact.Gender.Male ? '男' : 
                contact.gender() === this.bot.Contact.Gender.Female ? '女' : '未知',  // 性别
          省份: contact.province()?.trim() || '',  // 获取省份
          城市: contact.city()?.trim() || '',  // 获取城市
          文件名: avatarFileName,  // 保存文件名（不带后缀）
        };
      });

      // 等待所有头像保存完毕
      const contactData = await Promise.all(avatarPromises);

      // 使用 xlsx 库导出 Excel 表格
      const ws = xlsx.utils.json_to_sheet(filteredContactData);  // 将联系人信息转换为 Excel 表格
      const wb = xlsx.utils.book_new();  // 创建一个新的 Excel 工作簿
      xlsx.utils.book_append_sheet(wb, ws, '联系人');  // 将联系人数据添加到工作簿

      // 将工作簿保存为 Excel 文件
      xlsx.writeFile(wb, 'contacts_with_details.xlsx');
      console.log('好友联系人信息已成功导出到 contacts_with_details.xlsx');
    } catch (error) {
      console.error('导出联系人信息失败:', error);
    }
  }

  run() {
    this.bot.start();
  }
}

new WeChaty().run();

2024年11月22日

【Python】按照关键词查找相应PPT

今天巧了，好几个同事问我要PPT，但是他们只能记得起来一些关键词，而我恰好也没有很足的印象，毕竟那是两三年前，还可能不是我做的东西！

WPS只能按照云文档进行查找关键词，那么电脑中几千个PPT要怎么找呢？（没错我电脑里真有2000个PPT (((φ(◎ロ◎;)φ)))）

我们可以根据他们截取的画面关键词，来对PPT进行索引，这样可以节约一些查找文件的时间，然后采用olefile库，查找对应PPT即可。

import os
from pptx import Presentation
import olefile

def is_powerpoint_file(file_path):
    """检查文件是否为PPT或PPTX格式"""
    valid_extensions = ['.ppt', '.pptx']
    return any(file_path.lower().endswith(ext) for ext in valid_extensions)

def index_powerpoint_files(search_dir):
    """索引指定目录中的所有PPT和PPTX文件"""
    ppt_files = []
    total_files = 0

    for root, _, files in os.walk(search_dir):
        total_files += len(files)
        for file in files:
            if file.startswith("~$"):  # 跳过临时文件
                continue
            file_path = os.path.join(root, file)
            if is_powerpoint_file(file_path):
                ppt_files.append(file_path)
    
    print(f"[信息] 已索引文件总数：{total_files}，PPT文件总数：{len(ppt_files)}")
    return ppt_files

def search_text_in_pptx(file_path, target_text):
    """在PPTX文件中搜索目标文字"""
    try:
        presentation = Presentation(file_path)
        for slide in presentation.slides:
            for shape in slide.shapes:
                if shape.has_text_frame and target_text in shape.text:
                    return True
    except Exception as e:
        print(f"[错误] 无法处理文件：{file_path}，错误信息：{e}")
    return False

def search_text_in_ppt(file_path, target_text):
    """在PPT文件中搜索目标文字"""
    try:
        if olefile.isOleFile(file_path):
            with olefile.OleFileIO(file_path) as ole:
                if "PowerPoint Document" in ole.listdir():
                    stream = ole.openstream("PowerPoint Document")
                    content = stream.read().decode(errors="ignore")
                    if target_text in content:
                        return True
    except Exception as e:
        print(f"[错误] 无法处理文件：{file_path}，错误信息：{e}")
    return False

def search_text_in_powerpoint_files(ppt_files, target_text):
    """在索引的PPT文件中搜索目标文字"""
    result_files = []
    total_files = len(ppt_files)

    print(f"[信息] 开始内容搜索，共需处理 {total_files} 个文件")
    for idx, file_path in enumerate(ppt_files, start=1):
        print(f"[处理中] {idx}/{total_files} - 正在处理文件：{file_path}")
        if file_path.lower().endswith(".pptx") and search_text_in_pptx(file_path, target_text):
            result_files.append(file_path)
        elif file_path.lower().endswith(".ppt") and search_text_in_ppt(file_path, target_text):
            result_files.append(file_path)

    return result_files

if __name__ == "__main__":
    search_dir = "D:\\"
    target_text = input("请输入要查找的文字（支持中文）：")
    
    print(f"[信息] 正在索引盘中的PPT文件，请稍候...\n")
    ppt_files = index_powerpoint_files(search_dir)
    
    if ppt_files:
        print(f"\n[信息] 索引完成，开始搜索包含 '{target_text}' 的文件...\n")
        matching_files = search_text_in_powerpoint_files(ppt_files, target_text)
        if matching_files:
            print("\n[结果] 找到包含目标文字的PPT文件：")
            for file in matching_files:
                print(file)
        else:
            print("\n[结果] 未找到包含该文字的PPT文件。")
    else:
        print("\n[信息] 未在指定目录中找到任何PPT文件。")

2024年11月18日

【Python】对彩色LOGO进行批量反白处理

为了制作一些高大上的风格化 PPT，有时我们需要很多客户的反白色LOGO，以符合当下的一些设计潮流。

目前常用的做法是在 PPT中对图片本身进行亮度调整，可以理解为一键过曝，但是这对于一些本身就含有白色的图片不适用，也无法处理JPG的图片，更没法快速将反白的图片进行批量保存，以便存储成库，在其他场景继续使用。

因此使用脚本可以防止原来的白色部分混成一团，预先对原LOGO白色区域进行透明化，然后对其他颜色区域反白。

这个脚本目前适用于我的工作环境，包含一些问题，例如如果原来的图标包含白色文字，这样会将其透明化，因此还需按照使用情况进行调整。

后续考虑加入对JPG进行处理的过程，原理上是对白色部分预先透明度处理，然后后续步骤基本一致，不过使用JPG作为LOGO的客户较少，该功能并不急迫。若要实现该功能，可能需要使用OCR对文字部分预先识别处理，流程上麻烦不少，不过由于wechat-ocr的强大功能，应该也可以稳定呈现，wechat-ocr此前有过一些实践，效果出众，推荐大家使用。

此外脚本尚未测试灰色部分是否会有问题，目前感觉应该会有问题，若使用中有其他问题会随时更新。

import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import random
import string
class ImageProcessor(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Logo Image Processor")
        self.geometry("600x600")
        self.image_path = None
        self.images = []  # 用于存储多个图像
        self.image_label = tk.Label(self)
        self.image_label.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
        
        # 设置拖放区域
        self.drop_area = tk.Label(self, text="拖动PNG图片到此区域", relief="solid", width=30, height=4)
        self.drop_area.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
        self.drop_area.bind("<Enter>", self.on_drag_enter)
        self.drop_area.bind("<Leave>", self.on_drag_leave)
        self.drop_area.bind("<ButtonRelease-1>", self.on_drop)
        # 添加处理按钮
        self.process_button = tk.Button(self, text="处理图片并保存", command=self.process_images)
        self.process_button.pack(pady=10)
    def on_drag_enter(self, event):
        self.drop_area.config(bg="lightblue")
    def on_drag_leave(self, event):
        self.drop_area.config(bg="white")
    def on_drop(self, event):
        file_paths = filedialog.askopenfilenames(filetypes=[("PNG files", "*.png")])
        if file_paths:
            self.load_images(file_paths)
    def load_images(self, paths):
        self.images = []  # 清空当前图像列表
        for path in paths:
            image = Image.open(path).convert("RGBA")  # 确保加载为RGBA格式以处理透明度
            self.images.append((path, image))  # 存储图像及其路径
        if self.images:
            self.display_image(self.images[0][1])  # 显示第一张图片
    def display_image(self, image):
        image_tk = ImageTk.PhotoImage(image)
        self.image_label.config(image=image_tk)
        self.image_label.image = image_tk
    def process_images(self):
        if self.images:
            for original_path, image in self.images:
                # 获取图像的每个像素
                pixels = image.load()
                width, height = image.size
                
                for x in range(width):
                    for y in range(height):
                        r, g, b, a = pixels[x, y]
                        
                        # 将白色部分透明化
                        if r == 255 and g == 255 and b == 255:
                            pixels[x, y] = (255, 255, 255, 0)  # 将白色变为透明
                        elif a != 0:  # 如果是非透明区域
                            # 将所有非透明区域变为纯白色
                            pixels[x, y] = (255, 255, 255, a)  # 变为白色，保持原透明度
                
                # 生成随机字符并保存处理后的图像
                random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
                output_path = f"processed_logo_{random_suffix}.png"
                image.save(output_path)
                print(f"处理后的LOGO图片已保存为 {output_path}")
                
            # 更新显示处理后的图片（显示第一张图像）
            self.display_image(self.images[0][1])
if __name__ == "__main__":
    app = ImageProcessor()
    app.mainloop()

此外，还可以对JPG进行处理：

注意保证输入图片的分辨率，其平滑操作对分辨率会有一定的损失。

import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageFilter
import random
import string
import os
class ImageProcessor(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Logo Image Processor")
        self.geometry("400x200")  # 设置主窗口大小
        self.image_path = None
        self.images = []  # 用于存储多个图像
        self.processed_images = []  # 用于存储处理后图像路径
        # 设置拖放区域
        self.drop_area = tk.Label(self, text="拖动PNG或JPG图片到此区域", relief="solid", width=30, height=4)
        self.drop_area.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
        self.drop_area.bind("<Enter>", self.on_drag_enter)
        self.drop_area.bind("<Leave>", self.on_drag_leave)
        self.drop_area.bind("<ButtonRelease-1>", self.on_drop)
        # 添加处理按钮
        self.process_button = tk.Button(self, text="处理图片并保存", command=self.process_images)
        self.process_button.pack(pady=10)
        # 添加选择输出路径按钮
        self.output_dir = None
        self.select_output_button = tk.Button(self, text="选择保存路径", command=self.select_output_dir)
        self.select_output_button.pack(pady=5)
    def on_drag_enter(self, event):
        self.drop_area.config(bg="lightblue")
    def on_drag_leave(self, event):
        self.drop_area.config(bg="white")
    def on_drop(self, event):
        file_paths = filedialog.askopenfilenames(filetypes=[("Image files", "*.png *.jpg *.jpeg")])
        if file_paths:
            self.load_images(file_paths)
    def load_images(self, paths):
        self.images = []  # 清空当前图像列表
        for path in paths:
            try:
                image = Image.open(path)
                # 将JPG图像转换为支持透明度的RGBA格式
                if image.mode != "RGBA":
                    image = image.convert("RGBA")
                self.images.append((path, image))  # 存储图像及其路径
            except Exception as e:
                print(f"无法加载图像 {path}: {e}")
                messagebox.showerror("错误", f"无法加载图像 {path}")
                
    def select_output_dir(self):
        self.output_dir = filedialog.askdirectory()
        if self.output_dir:
            print(f"选择的输出目录是: {self.output_dir}")
        
    def process_images(self):
        if not self.images:
            messagebox.showwarning("警告", "请先加载图片")
            return
        
        if not self.output_dir:
            messagebox.showwarning("警告", "请先选择保存路径")
            return
        for original_path, image in self.images:
            pixels = image.load()
            width, height = image.size
            
            for x in range(width):
                for y in range(height):
                    r, g, b, a = pixels[x, y]
                    
                    # 将接近白色的区域透明化，设置阈值范围 (240, 240, 240) 到 (255, 255, 255)
                    if r >= 240 and g >= 240 and b >= 240:
                        pixels[x, y] = (255, 255, 255, 0)  # 将接近白色的部分变为透明
                    elif a != 0:  # 如果是非透明区域
                        # 将所有非透明区域变为纯白色
                        pixels[x, y] = (255, 255, 255, a)  # 变为白色，保持原透明度
            # 对图像进行边缘平滑处理，减少杂色
            image = image.filter(ImageFilter.GaussianBlur(radius=2))
            
            # 生成随机字符并保存处理后的图像
            random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
            output_path = os.path.join(self.output_dir, f"processed_logo_{random_suffix}.png")
            image.save(output_path)
            print(f"处理后的LOGO图片已保存为 {output_path}")
            self.processed_images.append(output_path)
if __name__ == "__main__":
    app = ImageProcessor()
    app.mainloop()

2024年11月13日

【Python】西游记取景地复刻图片合成

输入两个图片，进行合成，自动在1图标记1986，2图标记2024，图片对齐，保持没有空白，程序自动复位。

import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageTk, ImageDraw, ImageFont

class ImageCombinerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("图片合成器")
        
        # 初始化存储的图片路径
        self.first_image_path = None
        self.second_image_path = None
        
        # 创建界面组件
        self.create_widgets()
    
    def create_widgets(self):
        # 第一张图片上传按钮
        self.btn_upload_first = tk.Button(self.root, text="上传第一张图片", command=self.upload_first_image)
        self.btn_upload_first.grid(row=0, column=0, padx=10, pady=10)
        
        # 第二张图片上传按钮
        self.btn_upload_second = tk.Button(self.root, text="上传第二张图片", command=self.upload_second_image)
        self.btn_upload_second.grid(row=0, column=1, padx=10, pady=10)
        
        # 合成按钮
        self.btn_combine = tk.Button(self.root, text="合成图片", command=self.combine_images)
        self.btn_combine.grid(row=1, column=0, columnspan=2, padx=10, pady=10)
        
        # 显示图片区域
        self.image_panel = tk.Label(self.root)
        self.image_panel.grid(row=2, column=0, columnspan=2, padx=10, pady=10)

    def upload_first_image(self):
        file_path = filedialog.askopenfilename(title="选择第一张图片", filetypes=[("Image files", "*.jpg;*.jpeg;*.png")])
        if file_path:
            self.first_image_path = file_path
            messagebox.showinfo("图片上传", "第一张图片已成功上传。")
    
    def upload_second_image(self):
        file_path = filedialog.askopenfilename(title="选择第二张图片", filetypes=[("Image files", "*.jpg;*.jpeg;*.png")])
        if file_path:
            self.second_image_path = file_path
            messagebox.showinfo("图片上传", "第二张图片已成功上传。")
    
    def combine_images(self):
        if not self.first_image_path or not self.second_image_path:
            messagebox.showerror("错误", "请先上传两张图片。")
            return
        
        img1 = Image.open(self.first_image_path)
        img2 = Image.open(self.second_image_path)

        # 检查并缩放图像，如果图像的尺寸超过指定最大尺寸
        img1 = self.resize_image(img1)
        img2 = self.resize_image(img2)

        # 统一宽度，按比例调整高度
        img1, img2 = self.resize_images_to_same_width(img1, img2)

        # 添加年份文字到图片
        self.add_text_to_image(img1, "1986")
        self.add_text_to_image(img2, "2024")
        
        width1, height1 = img1.size
        width2, height2 = img2.size
        
        new_image = Image.new('RGB', (width1, height1 + height2), (255, 255, 255))
        new_image.paste(img1, (0, 0))
        new_image.paste(img2, (0, height1))
        
        output_path = filedialog.asksaveasfilename(defaultextension=".jpg", filetypes=[("JPEG files", "*.jpg"), ("PNG files", "*.png")])
        if output_path:
            new_image.save(output_path)
            messagebox.showinfo("图片合成", f"图片已成功合并并保存到 {output_path}")
            
            new_image.thumbnail((300, 300))
            tk_image = ImageTk.PhotoImage(new_image)
            self.image_panel.config(image=tk_image)
            self.image_panel.image = tk_image
        
        self.first_image_path = None
        self.second_image_path = None
        self.image_panel.config(image='')
        messagebox.showinfo("复位", "程序已复位，可重新上传图片。")

    def resize_image(self, img, max_size=(2000, 2000), max_ratio=0.8):
        # 检查图像大小是否超过最大尺寸
        width, height = img.size
        max_width, max_height = max_size
        # 缩放比例，确保图像不超出最大宽度和高度
        ratio = min(max_width / width, max_height / height, max_ratio)
        
        if ratio < 1:
            new_width = int(width * ratio)
            new_height = int(height * ratio)
            img = img.resize((new_width, new_height), Image.LANCZOS)
        return img

    def resize_images_to_same_width(self, img1, img2):
        # 获取两张图的宽度
        width1, height1 = img1.size
        width2, height2 = img2.size
        
        # 选择较小的宽度
        new_width = min(width1, width2)
        
        # 计算按比例缩放后的高度
        new_height1 = int(height1 * (new_width / width1))
        new_height2 = int(height2 * (new_width / width2))
        
        # 调整大小
        img1 = img1.resize((new_width, new_height1), Image.LANCZOS)
        img2 = img2.resize((new_width, new_height2), Image.LANCZOS)
        
        return img1, img2

    def add_text_to_image(self, image, text):
        draw = ImageDraw.Draw(image)
        
        # 获取图片宽度并计算字体大小
        image_width = image.size[0]
        font_size = int(image_width * 0.10)  # 字体大小为图片宽度的 10%
        
        # 设置自定义字体路径
        font_path = r"C:\Users\Lumix\AppData\Local\Microsoft\Windows\Fonts\LCD-BOLD-5.ttf"
        
        try:
            font = ImageFont.truetype(font_path, font_size)
        except IOError:
            font = ImageFont.load_default()
        
        # 设置文字位置、颜色等
        text_position = (10, 10)
        text_color = (255, 165, 0)  # 橙黄色
        stroke_color = (139, 0, 0)  # 深红色描边
        
        # 绘制描边（文字偏移）
        for offset in [-2, 0, 2]:
            draw.text((text_position[0] + offset, text_position[1] + offset), text, fill=stroke_color, font=font)
        
        # 绘制橙黄色文字
        draw.text(text_position, text, fill=text_color, font=font)

# 创建并运行应用
root = tk.Tk()
app = ImageCombinerApp(root)
root.mainloop()

获取西游记的对应图片，可以对小红书已经合成的图片进行裁切，由于我这边看到的很多图都是一比一组合的，因此可以很方便将图片分开：

import sys
import random
import string
from PIL import Image

def generate_random_filename():
    return ''.join(random.choices(string.digits, k=8)) + '.jpg'

def split_image(image_path):
    # 打开图像
    image = Image.open(image_path)
    width, height = image.size
    half_height = height // 2

    # 分割图像
    upper_half = image.crop((0, 0, width, half_height))
    lower_half = image.crop((0, half_height, width, height))

    # 生成随机文件名
    upper_filename = generate_random_filename()
    lower_filename = generate_random_filename()

    # 保存分割后的图像
    upper_half.save(upper_filename)
    lower_half.save(lower_filename)

    print(f"图像已成功分割并保存为 {upper_filename} 和 {lower_filename}")

if __name__ == "__main__":
    # 检查是否提供了图像路径
    if len(sys.argv) < 2:
        print("请将图像文件拖动到此脚本上运行。")
    else:
        # 获取图像路径
        image_path = sys.argv[1]
        split_image(image_path)

2024年11月10日

【Python】简单的按照文件类型批量分拣到对应文件夹的脚本

快速分拣你乱乱的桌面

import os
import shutil

# 定义文件类型和对应的文件夹
file_types = {
    'Images': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'],  # 添加了 .webp 格式
    'Documents': ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.doc', '.xls', '.ppt'],  # 添加了 .doc, .xls, .ppt 格式
    'Videos': ['.mp4', '.avi', '.mkv', '.mov'],
    'Audio': ['.mp3', '.wav', '.flac'],
    'Archives': ['.zip', '.rar', '.7z'],
    'Blender': ['.blend', '.blend1'],  # Blender 文件
    'AI源文件': ['.ai']  # AI 文件
}

# 指定文件夹路径
folder_path = r"D:\2024-9-new"  # 修改为你的文件夹路径

# 创建目标文件夹（如果不存在）
for folder_name in file_types.keys():
    file_type_folder = os.path.join(folder_path, folder_name)
    os.makedirs(file_type_folder, exist_ok=True)  # 使用 exist_ok=True 避免重复检查

# 遍历指定文件夹中的文件，按类型分类
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)

    if os.path.isfile(file_path):
        file_extension = os.path.splitext(filename)[1].lower()

        for category, extensions in file_types.items():
            if file_extension in extensions:
                destination = os.path.join(folder_path, category, filename)

                # 检查是否已经存在文件
                if not os.path.exists(destination):
                    shutil.move(file_path, destination)
                    print(f"文件 {filename} 已移动到 {category} 文件夹。")
                else:
                    print(f"文件 {filename} 已存在于 {category} 文件夹，跳过移动。")
                break

2024年11月7日

【Python】一个适配本网站性能的压缩图片脚本

由于网站的服务器与带宽性能有限，因此上传的图片被严格限制了边长和大小，使用脚本可以有效对想要上传的文件进行批处理，节省时间。

参考代码：

实现对jpg、png、webp等文件的压缩，限制最长边2560，大小2m以下，随机文件名且保留可能含有的EXIF信息。

暂不支持中文路径文件夹。

import os
import random
import string
from PIL import Image
import tkinter as tk
from tkinterdnd2 import TkinterDnD, DND_FILES

# 生成随机文件名
def generate_random_filename(length=10):
    """生成指定长度的随机文件名（字母和数字）"""
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))

def compress_image(input_path, max_size=2560, max_file_size=2 * 1024 * 1024, quality=85):
    """对图片进行压缩和尺寸调整，保留EXIF信息"""
    img = Image.open(input_path)
    exif_data = img.info.get("exif")  # 获取EXIF数据
    width, height = img.size
    file_size = os.path.getsize(input_path)
    
    # 生成随机文件名前缀
    random_prefix = generate_random_filename(10)  # 10 位随机字符前缀

    # 获取文件扩展名
    file_extension = os.path.splitext(input_path)[1].lower()

    # 如果图片是RGBA格式，将其转换为RGB格式，只对JPEG格式需要转换
    if img.mode == 'RGBA' and file_extension not in ['.jpeg', '.jpg']:
        output_path = os.path.join(os.path.dirname(input_path), f"{random_prefix}_已压缩{file_extension}")
        if exif_data:
            img.save(output_path, quality=quality, optimize=True, exif=exif_data)
        else:
            img.save(output_path, quality=quality, optimize=True)
    else:
        # 如果图片不需要压缩，直接保存为“无需压缩”版本
        if max(width, height) <= max_size and file_size <= max_file_size:
            output_path = os.path.join(os.path.dirname(input_path), f"{random_prefix}_无需压缩{file_extension}")
            if exif_data:
                img.save(output_path, exif=exif_data)
            else:
                img.save(output_path)
        else:
            # 如果需要调整大小
            if max(width, height) > max_size:
                scaling_factor = max_size / float(max(width, height))
                new_size = (int(width * scaling_factor), int(height * scaling_factor))
                img = img.resize(new_size, Image.LANCZOS)
            
            # 保存为JPEG格式，质量为85
            output_path = os.path.join(os.path.dirname(input_path), f"{random_prefix}_已压缩.jpg")
            if exif_data:
                img.save(output_path, quality=quality, optimize=True, exif=exif_data)
            else:
                img.save(output_path, quality=quality, optimize=True)
            
            # 如果文件过大，继续降低质量，直到符合要求
            while os.path.getsize(output_path) > max_file_size and quality > 10:
                quality -= 10
                if exif_data:
                    img.save(output_path, quality=quality, optimize=True, exif=exif_data)
                else:
                    img.save(output_path, quality=quality, optimize=True)
    
    return output_path

# 处理拖动的文件
def on_drop(event):
    file_paths = event.data.split()
    process_images(file_paths)

# 批量处理图片
def process_images(file_paths):
    processed_files = []
    for file_path in file_paths:
        if is_image_file(file_path):
            processed_file = compress_image(file_path)
            processed_files.append(processed_file)
    print(f"处理完成的文件: {processed_files}")

# 判断文件是否为图片
def is_image_file(file_path):
    try:
        img = Image.open(file_path)
        return True
    except IOError:
        return False

# 创建GUI界面
root = TkinterDnD.Tk()
root.title("图片压缩工具")
root.geometry("600x400")

label = tk.Label(root, text="将图片拖到这里", padx=20, pady=20)
label.pack(padx=20, pady=20)

# 绑定拖拽事件
root.drop_target_register(DND_FILES)
root.dnd_bind('<<Drop>>', on_drop)

# 运行主循环
root.mainloop()

2024年11月7日

【Python】调用微信OCR来对输入的图片进行文字识别

参考资料：

1、三年磨一剑——微信OCR图片文字提取-腾讯云开发者社区-腾讯云

2、可供独立使用且最小依赖的微信 OCR 功能包 – 吾爱破解 – 52pojie.cn

3、GitHub – kanadeblisst00/wechat_ocr: 使用Python调用微信本地ocr服务

4、[原创]Python调用微信OCR识别文字和坐标-编程技术-看雪-安全社区|安全招聘|kanxue.com

示例代码：

这段简易代码实现创建一个GUI，输入一个图片，将图片中的文字进行OCR，按段落生成到一个TXT中。

import os
import json
import time
import tkinter as tk
from tkinter import filedialog, messagebox
from wechat_ocr.ocr_manager import OcrManager, OCR_MAX_TASK_ID


def ocr_result_callback(img_path: str, results: dict):
    save_text_to_txt(results, img_path)


def save_text_to_txt(ocr_results, original_image_path):
    if 'ocrResult' in ocr_results and isinstance(ocr_results['ocrResult'], list):
        all_text = ""
        for result in ocr_results['ocrResult']:
            text = result.get('text', '')
            if text:
                all_text += text + "\n"

        base_name, ext = os.path.splitext(original_image_path)
        txt_path = f"{base_name}_ocr_result.txt"
        with open(txt_path, 'w', encoding='utf-8') as f:
            f.write(all_text)
        print(f"已保存OCR结果到: {txt_path}")
    else:
        print("OCR结果不符合预期格式。")


def select_image():
    file_path = filedialog.askopenfilename(
        title="选择图片",
        filetypes=(("Image files", "*.png;*.jpg;*.jpeg;*.bmp;*.tiff"), ("All files", "*.*"))
    )
    if not file_path:
        return

    wechat_ocr_dir = r"C:\Users\YourID\AppData\Roaming\Tencent\WeChat\XPlugin\Plugins\WeChatOCR\7079\extracted\WeChatOCR.exe"
    wechat_dir = r"C:\Program Files\Tencent\WeChat\[3.9.12.17]"
    ocr_manager = OcrManager(wechat_dir)
    ocr_manager.SetExePath(wechat_ocr_dir)
    ocr_manager.SetUsrLibDir(wechat_dir)
    ocr_manager.SetOcrResultCallback(ocr_result_callback)
    ocr_manager.StartWeChatOCR()

    ocr_manager.DoOCRTask(file_path)

    while ocr_manager.m_task_id.qsize() > 0 or ocr_manager.IsOcrRunning():
        time.sleep(0.5)

    ocr_manager.KillWeChatOCR()
    messagebox.showinfo("完成", "文字提取并保存完成！")


def main():
    root = tk.Tk()
    root.title("OCR文字提取到txt工具")
    root.geometry("300x100")

    btn_select = tk.Button(root, text="选择图片", command=select_image)
    btn_select.pack(pady=20)

    root.mainloop()


if __name__ == "__main__":
    main()

2024年11月6日

【Python】本地运行的，以缩略图搜原图脚本

像我这种不太注重整理的人，在想找一张原图时候往往很抓狂，因为文件夹太多了！

因此今天问ChatGPT“协调”了一段Python代码，可以有效的在本地用jpg缩略图来搜索原jpg图。

代码主要用到PIL库，本来想用OpenCV但是实在是搞不定中文路径问题，本着能用就行的原则，因此只能使用PIL，代码如下👇。

from PIL import Image
import os
import numpy as np
import tkinter as tk
from tkinter import messagebox
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

def image_similarity(img1, img2):
    img1 = img1.resize((100, 100))
    img2 = img2.resize((100, 100))
    
    arr1 = np.array(img1)
    arr2 = np.array(img2)

    mse = np.mean((arr1 - arr2) ** 2)
    return 1 / (1 + mse)

def log_image_path(log_path, image_path):
    with open(log_path, 'a', encoding='utf-8') as log_file:
        log_file.write(f"{image_path}\n")

def process_image(file_path, target_image, threshold, retries=3):
    for attempt in range(retries):
        try:
            with Image.open(file_path) as current_image:
                similarity = image_similarity(target_image, current_image)
                return file_path if similarity >= threshold else None
        except Exception as e:
            print(f"无法读取图片: {file_path}, 错误: {e}")
            time.sleep(1)  # 等待1秒后重试
    return None

def search_similar_images(target_image_path, search_path, log_path, threshold=0.8):
    if not os.path.exists(target_image_path):
        print("目标图片路径不存在")
        return []

    with Image.open(target_image_path) as target_image:
        similar_images = []
        with ThreadPoolExecutor(max_workers=16) as executor:  # 适当选择线程数
            futures = {}
            file_count = 0
            
            for root, dirs, files in os.walk(search_path):
                for file in files:
                    if file.lower().endswith(('.jpg', '.jpeg')):
                        file_path = os.path.join(root, file)
                        print(f"尝试读取图片: {file_path}")
                        futures[executor.submit(process_image, file_path, target_image, threshold)] = file_path
                        file_count += 1

            for future in as_completed(futures):
                result = future.result()
                if result:
                    similar_images.append(result)
                    log_image_path(log_path, result)

    if similar_images:
        show_popup(similar_images)
    else:
        print("未找到相似图片")
    
    show_completion_popup()

def show_popup(similar_images):
    root = tk.Tk()
    root.withdraw()  # 隐藏主窗口
    messagebox.showinfo("找到相似图片", f"相似图片: {', '.join(similar_images)}")
    root.destroy()

def show_completion_popup():
    root = tk.Tk()
    root.withdraw()  # 隐藏主窗口
    messagebox.showinfo("搜索完毕", "所有图片搜索已完成。")
    root.destroy()

# 使用示例
if __name__ == "__main__":
    target_image_path = r'D:/xxx.jpg'  # 定义你需要查找的图片
    search_path = r'F:/'  # 需要搜索的路径
    log_path = r'D:/similar_images_log.txt'  # 日志文件路径
    search_similar_images(target_image_path, search_path, log_path)

2024年10月29日

分类： 学习笔记

用到的库：

工作原理：

参考资料：

示例代码：

分类：学习笔记