python批量删除WordPress文章的非图片内容，即只保留文章中的图片，删除非图片内容

近期有个网站的文字描述需要大换血，但是图片不想删，毕竟图片都是精选的，步入正题，借助大佬的指导如下方式来实现

在主题function.php文件中添加以下代码，添加之后保存文件，然后刷新网站，等待加载，然后就好了，注意使用此代码前！备份数据库！。

$args = array(  
            'post_type' => 'post',  
            'posts_per_page' => -1, 
            'fields' => 'ids'
        );  

$posts = get_posts($args);  
     foreach($posts as $id){
    
        // 获取文章的分类ID
        $post_categories = wp_get_post_categories($id);
        $target_category_id = 59; // 替换为你的特定分类ID
    
        // 检查文章是否属于特定分类
        if (in_array($target_category_id, $post_categories)) {
            // 获取文章内容
            $post_content = get_post_field('post_content', $id);
    
            // 使用正则表达式匹配所有的<img>标签
            $pattern = '/<img[^>]+>/';
            preg_match_all($pattern, $post_content, $matches);
    
            // 将匹配到的图片标签拼接成字符串
            $images_content = implode('', $matches[0]);
    
            // 更新文章内容为保留的图片内容，并保留原有的空行
            wp_update_post(array('ID' => $id, 'post_content' => $images_content));
        }
    }

逻辑是：

1. 获取全部文章ID 为数组
2.循环每个ID
3.判断这个ID的文章是不是59分类的
4.如果是获取当前ID的文章
5.过滤出图片保存到某个变量
6然后把这个变量的信息转成图片格式保存给当前ID文章。

由于以上代码，比较麻烦，对CPU要求很高，所以今天更新了一个python的自动化来批量删除WordPress文章的非图片内容

以下python代码是导出对应分类的ID

用去批量替换，原本是想，直接对应分类，直接把整个分类的都进行删除替换，但是为了更加有自定义性，采用文章ID的方法

#注意要首先先创建对应保存的目录，否则无权限

$args = array(
    'post_type' => 'post',
    'posts_per_page' => -1,
    'fields' => 'ids',
    'tax_query' => array(
        array(
            'taxonomy' => 'category',
            'field' => 'term_id',
            'terms' => 59, #自己的分类ID
        ),
    ),
);

$query = new WP_Query($args);

$posts_ids = $query->posts;

// 指定文件路径
$filepath = '/www/wwwroot/article_ids.txt'; #绝对路径自行填写

// 打开文件，准备写入
$file = fopen($filepath, 'w');

if ($file === false) {
    echo "无法打开文件，请检查路径和权限。";
    exit;
}

// 遍历文章ID，写入文件
foreach ($posts_ids as $id) {
    fwrite($file, $id . "\n");
}

// 关闭文件
fclose($file);

echo "文章ID已成功导出到 {$filepath}";

// 重置Post Data
wp_reset_postdata();

以下是

def convert_numbers_to_comma_separated(input_string):
    # 将输入字符串按行分割，转换成列表
    numbers_list = input_string.split('\n')

    # 使用逗号连接列表中的每个元素
    return ','.join(numbers_list)

# 示例输入
input_string = """
79240
79238
"""

# 调用函数并打印结果
output_string = convert_numbers_to_comma_separated(input_string)
print(output_string)

import requests
import re

# 假设的文章ID列表
provided_posts_ids = [1365,79240]
# 你通过浏览器抓取到的Authorization token
my_auth_token = 'Bearer ..-'

headers = {
    'Authorization': my_auth_token
}

# 每5个ID一组分块处理
for i in range(0, len(provided_posts_ids), 5):
    chunk = provided_posts_ids[i:i+5]
    for post_id in chunk:
        # 使用headers包含Authorization信息
        response = requests.get(f'https://www.qq.com/wp-json/wp/v2/posts/{post_id}', headers=headers)
        post_data = response.json()

        target_category_id = 59  # 特定分类ID
        if target_category_id in post_data['categories']:
            # 使用正则表达式找到所有<img>标签
            images = re.findall(r'<img[^>]+>', post_data['content']['rendered'])

            # 更新文章内容为仅包含图片的HTML
            updated_content = ''.join(images)
            # 在更新请求中也使用headers
            update_response = requests.post(f'https://www.qq.com/wp-json/wp/v2/posts/{post_id}', headers=headers, data={'content': updated_content})

            # 打印更新成功的消息
            print(f'文章ID {post_id} 处理完成。')

    # 每处理完一组后暂停1秒
    # time.sleep(1)