- 填写csdn的cookie,下载文章用
- 填写csdn文件的文章id
- 若需上传图片,填写typecho的cookie,并新建一篇文章后,保存,在此打开这篇文章,上传一张图片,获取上传图片的地址,填入config字典中
import requests import json import html2text import re import os # 记得更改vuar_url链接地址 config = { 'csdn_cookie': '', 'csdn_url': 'https://blog-console-api.csdn.net/v1/editor/getArticle?id=', 'csdn_id':'118547334', 'vuar_cookie': '', 'vuar_url': 'https://www.vuar.cn/index.php/action/upload?cid=121&_=11fd2ca6cf612bb0ce6b1ec98701bc3a' } def get_html(id): # 初始化数据 url = config['csdn_url'] + id header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36", "cookie": config['csdn_cookie']} # 请求 response = requests.get(url, headers=header) if (response.status_code == 200): res = response.json() return res['data']['title'], res['data']['content'] else: return '', '' def get_img(url): res = requests.get(url) if res.status_code == 200: name = re.search('https://.+(?=\?)', url) suffix = os.path.splitext(name.group())[1] return suffix, res.content else: return '', '' def html_to_md(html): # 处理html格式文件中的内容 return html2text.HTML2Text().handle(html) # 写入处理后的内容 # with open('1.md', 'w', encoding="utf-8") as f: # f.write(text) # print("爬取完毕") def post_img(local_path): data = { 'name': os.path.basename(local_path) } file = { 'file': open(local_path, 'rb') } headers = { 'Cookie': config['vuar_cookie'], 'Referer': 'https://www.vuar.cn/admin/write-post.php?{0}'.format(get_url_cid(config['vuar_url'])), 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36' } res = requests.post(config['vuar_url'], data=data, headers=headers, files=file) if res.status_code == 200: print('上传完成{0}'.format(res.json()[0])) return res.json()[0] else: print('上传失败') return False def get_url_cid(url): cid = re.search('cid=[0-9]+', url) return cid.group() def deal_img(md, title): # 找出md文件中的所有图片路径 pattern = re.compile('\(https://img-blog[\s\S]*?\)') img_url = pattern.findall(md) i = 1 # 循环处理图片 for url in img_url: # 将md中的url替换为数字 md = md.replace(url, '[{0}]'.format(i)) # 处理url,并下载 url = url.replace('\n', '').lstrip('(').rstrip(')') suffix, img = get_img(url) # 下载完后,生成本地存储地址,并写入本地 if suffix: img_local_path = '{0}/{1}{2}'.format(title, i, suffix) with open(img_local_path, 'wb') as f: f.write(img) # 写入本地后,上传至云端,并在md文档后添加本图片的上传链接 vuar_url = post_img(img_local_path) md = md + "\n[{i}]: {vuar_url}".format(i=i,vuar_url=vuar_url) else: print('图片下载失败:{0}'.format(url)) i += 1 return md def main(): # 初始化参数 # 获取html内容与title title, html = get_html(config['csdn_id']) # 创建文件夹 if not os.path.exists(title): os.makedirs(title) # html转md md = html_to_md(html) # 匹配出图片并下载,将md中的图片地址替换为博客的地址 md = deal_img(md, title) # 输出md文件 with open('{0}/{0}.md'.format(title), 'w', encoding="utf-8") as f: f.write(md) if __name__ == '__main__': main()