使用前言

  • 填写csdn的cookie,下载文章用
  • 填写csdn文件的文章id
  • 若需上传图片,填写typecho的cookie,并新建一篇文章后,保存,在此打开这篇文章,上传一张图片,获取上传图片的地址,填入config字典中
  • pip安装html2text(html转md用)

    具体代码如下

    import requests
    import json
    import html2text
    import re
    import os
    
    # 记得更改vuar_url链接地址
    
    config = {
        'csdn_cookie': '',
        'csdn_url': 'https://blog-console-api.csdn.net/v1/editor/getArticle?id=',
        'csdn_id':'118547334',
        'vuar_cookie': '',
        'vuar_url': 'https://www.vuar.cn/index.php/action/upload?cid=121&_=11fd2ca6cf612bb0ce6b1ec98701bc3a'
    }
    
    
    def get_html(id):
        # 初始化数据
        url = config['csdn_url'] + id
        header = {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36",
            "cookie": config['csdn_cookie']}
        # 请求
        response = requests.get(url, headers=header)
        if (response.status_code == 200):
            res = response.json()
            return res['data']['title'], res['data']['content']
        else:
            return '', ''
    
    
    def get_img(url):
        res = requests.get(url)
        if res.status_code == 200:
            name = re.search('https://.+(?=\?)', url)
            suffix = os.path.splitext(name.group())[1]
            return suffix, res.content
        else:
            return '', ''
    
    
    def html_to_md(html):
        # 处理html格式文件中的内容
        return html2text.HTML2Text().handle(html)
        # 写入处理后的内容
        # with open('1.md', 'w', encoding="utf-8") as f:
        #     f.write(text)
        # print("爬取完毕")
    
    
    def post_img(local_path):
        data = {
            'name': os.path.basename(local_path)
        }
        file = {
            'file': open(local_path, 'rb')
        }
        headers = {
            'Cookie': config['vuar_cookie'],
            'Referer': 'https://www.vuar.cn/admin/write-post.php?{0}'.format(get_url_cid(config['vuar_url'])),
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
        }
        res = requests.post(config['vuar_url'], data=data, headers=headers, files=file)
        if res.status_code == 200:
            print('上传完成{0}'.format(res.json()[0]))
            return res.json()[0]
        else:
            print('上传失败')
            return False
    
    
    def get_url_cid(url):
        cid = re.search('cid=[0-9]+', url)
        return cid.group()
    
    
    def deal_img(md, title):
        # 找出md文件中的所有图片路径
        pattern = re.compile('\(https://img-blog[\s\S]*?\)')
        img_url = pattern.findall(md)
        i = 1
        # 循环处理图片
        for url in img_url:
            # 将md中的url替换为数字
            md = md.replace(url, '[{0}]'.format(i))
            # 处理url,并下载
            url = url.replace('\n', '').lstrip('(').rstrip(')')
            suffix, img = get_img(url)
            # 下载完后,生成本地存储地址,并写入本地
            if suffix:
                img_local_path = '{0}/{1}{2}'.format(title, i, suffix)
                with open(img_local_path, 'wb') as f:
                    f.write(img)
                # 写入本地后,上传至云端,并在md文档后添加本图片的上传链接
                vuar_url = post_img(img_local_path)
                md = md + "\n[{i}]: {vuar_url}".format(i=i,vuar_url=vuar_url)
            else:
                print('图片下载失败:{0}'.format(url))
            i += 1
        return md
    
    
    def main():
        # 初始化参数
        # 获取html内容与title
        title, html = get_html(config['csdn_id'])
        # 创建文件夹
        if not os.path.exists(title):
            os.makedirs(title)
        # html转md
        md = html_to_md(html)
        # 匹配出图片并下载,将md中的图片地址替换为博客的地址
        md = deal_img(md, title)
        # 输出md文件
        with open('{0}/{0}.md'.format(title), 'w', encoding="utf-8") as f:
            f.write(md)
    
    if __name__ == '__main__':
        main()

    上述代码只会自动上传图片并保存md文件,许手动将md文件中的内容,粘贴至文章处

最后修改:2022 年 07 月 13 日
如果觉得我的文章对你有用,请随意赞赏