首页 编程教程正文

酷狗音乐爬虫

piaodoo 编程教程 2020-02-22 22:17:17 81 0 python教程

本文来源吾爱破解论坛

本帖最后由 Hao_Tian22 于 2020-2-22 14:29 编辑

py瞎写的酷狗爬虫,函数调用有点乱,带简单的GUI
有什么改进意见或者bug什么的可以提出来
第一次使用需要先自行设置cookies,没有cookies很容易被拒绝
一次性爬取太多也会出问题
爬取过程中会在当前目录下生成"音乐"和"数据"文件夹用来储存,请选择好目录再使用
还有……貌似大多数歌曲只能爬标准品质
还有个在下载界面中点关闭会报错的bug,懒得修了

下面是打包好的文件(没安装的模块会自动安装,调用pip在线安装)
嗨学网 [python]酷狗音乐下载GUI版.7z (5.47 KB, 下载次数: 6) 2020-2-22 12:26 上传 点击文件名下载附件
源代码(包括自动安装模块)
下载积分: 吾爱币 -1 CB

源代码:
第一个文件(主文件)
[Python] 纯文本查看 复制代码

import os
import easygui as eg

try:  # 检测音乐文件夹,没有则新建
    os.mkdir('音乐')
    os.mkdir('数据')
except:
    print('检测到音乐文件夹已存在')
with open('数据/cookies.txt','a'):
    pass
with open('数据/歌单列表.txt','a'):
    pass
with open('数据/歌单哈希值列表.txt','a'):
    pass
from 下载器 import *
from 歌单获取器 import *
from 歌词格式转换器 import *


def download():
    # 选择模式
    mode_list = ['下载整个歌单', '根据歌曲名称下载', '根据哈希值下载', '导入文件批量下载', '转换utf-8为gbk', '更新cookies']
    mode = eg.choicebox(msg='请选择下载模式', title='选择模式', choices=mode_list)

    if mode == '下载整个歌单':
        get_song_list()

        if eg.ynbox(msg='歌单获取完成,已保存在<歌单列表.txt>,是否一键下载?', title='一键下载', choices=['是', '否']):
            download_list()
    elif mode == '根据歌曲名称下载':
        quality_list = ['标准(大部分允许下载)', '高品(很少有允许下载)', '超高品(不允许)', '无损(不允许)']
        quality = eg.choicebox(choices=quality_list, msg='选择音质')
        quality_dict = {}
        num = 1
        for i in quality_list:
            quality_dict[i] = num
            num += 1
        download_name(quality_dict[quality])
    elif mode == '根据哈希值下载':
        lyrics(download_hash(eg.enterbox('请输入哈希值'), True), eg.boolbox('是否下载歌词?', choices=['是', '否']))
    elif mode == '导入文件批量下载':
        download_list()
    elif mode == '转换utf-8为gbk':
        utf8_to_gbk()
    elif mode == '更新cookies':
        with open('数据/cookies.txt', 'r') as f:
            cookies_old = f.read()    
        cookies = eg.textbox('输入cookies,可在浏览器酷狗音乐页面按f12寻找\n下面的是原来的cookies,请删除后更改', '更新cookies', cookies_old)
        if cookies:
            with open('数据/cookies.txt', 'w') as f:
                f.write(cookies)
    else_mode = eg.choicebox(msg='本次操作已完成,是否进行其他操作', choices=['继续使用', '打开文件夹', '关闭程序'])
    if else_mode == '继续使用':#循环调用
        download()
    elif else_mode == '打开文件夹':
        os.system("explorer 音乐\n")

#调用函数
download()



第二个文件(下载)
[Python] 纯文本查看 复制代码
import json
from urllib import parse
import requests
import time
import easygui as eg

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4023.0 Safari/537.36 Edg/81.0.396.0'}
with open('数据/cookies.txt', 'r') as f:
    cookies = f.read()


# cookies='kg_mid=9526c011844091cafd12889f2c7e6ae6; _WCMID=164540295d9227c4c1934f5a; kg_dfid=10C0M20bBwoB0ROC2j3kckWa; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1579179321,1580200379,1580210126; kg_mid_temp=9526c011844091cafd12889f2c7e6ae6; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1580212346'

def str_to_dict(cookies):
    cookies_dict = {}
    cookies1 = cookies.replace(' ', '')
    cookies_list = cookies1.split(';')
    for str1 in cookies_list:
        key, values = str1.split('=', 1)
        cookies_dict[key] = values
    return cookies_dict


def download_hash(song_hash, is_GUI):
    url_json2 = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery191044011229047114075_1566198263706&hash={}'.format(
        song_hash)
    page2 = requests.get(url=url_json2, headers=headers, cookies=str_to_dict(cookies)).text
    song_json2 = json.loads(page2[42:-2])
    with open('数据/log.txt', 'w', encoding='utf-8') as log:
        log.write(str(song_json2))
    if song_json2['status'] == 0:
        print('cookies过期或发生其他错误,请重试')
        print('程序将退出')
        quit(1)
    # 傻逼文件名的检测替换
    file_name_error = ['"', '?', '/', '*', ':', '\\', '|', '<', '>']
    for file_name in file_name_error:
        if song_json2['data']['audio_name'].find(file_name) != -1:
            song_json2['data']['audio_name'] = song_json2['data']['audio_name'].replace(file_name, ' ')
    song_url = song_json2['data']['play_url']
    song_name = song_json2['data']['audio_name']
    song_length = int(song_json2['data']['timelength'])
    song_free = song_json2['data']['is_free_part']  # 试听歌曲为1,普通歌曲为0
    if song_url == '' and is_GUI:  # 检测歌曲是否能下载
        eg.msgbox(msg='&#10060;歌曲<{}>无数据或需要付费下载'.format(song_name), title='错误', ok_button='好的')
    else:
        try:  # 检测是否存在已下载文件
            notice_file_name = ''
            notice = ''
            if song_free == 1:  # 试听歌曲检测
                notice = '&#9888;歌曲为试听版,请核实'
                notice_file_name = '[试听]'
            with open('音乐/' + notice_file_name + song_name + '.mp3', 'xb') as f:
                song = requests.get(url=song_url, headers=headers, cookies=str_to_dict(cookies))
                f.write(song.content)
            song_length_format = str(int(song_length / 1000) // 60) + ":" + str(int(song_length / 1000) % 60)

            if is_GUI:
                eg.msgbox(msg='&#10004;歌曲<{}>下载完成\n歌曲时长{}\n'.format(song_name, song_length_format) + notice, title='成功',
                          ok_button='继续')
            else:
                print('&#10004;歌曲<{}>下载完成\n歌曲时长{}\n'.format(song_name, song_length_format) + notice)
        except:
            if is_GUI:
                eg.msgbox(msg='&#9888;歌曲<' + song_name + '>已存在', ok_button='继续')
            else:
                print('&#9888;歌曲<' + song_name + '>已存在')
        return song_json2


def download_name(mode):
    name = eg.enterbox(msg='输入歌曲名称')
    url_name = parse.quote(name)
    url_name = url_name.replace('%20', '+')
    url_json1 = 'https://songsearch.kugou.com/song_search_v2?callback=jQuery11240770641348037286_1566198223730' \
                '&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection' \
                '=1&privilege_filter=0&_=1566198223734'.format(url_name)
    page1 = requests.get(url=url_json1, headers=headers).text
    song_json = json.loads(page1[41:-2])
    i = 0
    song_list = []
    song_dict = {}
    for song in song_json['data']['lists']:
        file_name = song['FileName'].replace('<em>', '').replace('</em>', '')
        song_dict[file_name] = i
        song_list.append(file_name)
        i += 1
    i = int(song_dict[eg.choicebox(msg='请在以上结果中选择你要下载的歌曲', choices=song_list)])
    # i=int(input('请在以上结果中选择你要下载的歌曲(填数字编号)\n'))-1
    lyrics_mode = eg.boolbox('是否下载歌词?', choices=['是', '否'])
    if mode == 1:  # 流畅
        lyrics(download_hash(song_json['data']['lists'][i]['FileHash'], True), lyrics_mode)
    elif mode == 2:  # 高品
        lyrics(download_hash(song_json['data']['lists'][i]['HQFileHash'], True), lyrics_mode)
    elif mode == 3:  # 超高
        lyrics(download_hash(song_json['data']['lists'][i]['SQFileHash'], True), lyrics_mode)
    elif mode == 4:  # 无损
        lyrics(download_hash(song_json['data']['lists'][i]['ResFileHash'], True), lyrics_mode)


def download_list():
    with open('数据/歌单哈希值列表.txt', 'r') as f:
        song_hash_list = f.read().split()
    lyrics_mode = eg.boolbox(msg='是否需要一键下载全部歌词?', choices=['是', '否'])
    for i in song_hash_list:
        lyrics(download_hash(i, False), lyrics_mode)
        time.sleep(1)


def lyrics(json_list, mode):
    # print(type(json_list['data']['lyrics']))
    if str(json_list).find('纯音乐,请欣赏') != -1:
        print('&#10004;已检测到纯音乐,不需要歌词')
    elif json_list == None or json_list['data']['lyrics'] == '':
        print('&#10060;此歌曲无歌词')
    else:
        if mode:
            with open('音乐/' + json_list['data']['audio_name'] + '.lrc', 'w', encoding='gb18030') as f:
                f.write(json_list['data']['lyrics'].replace('\n', '').replace('\ufeff', '').replace('[id:$00000000]',
                                                                                                    '').replace('\r',
                                                                                                                '', 1))
            print('歌词下载完成\n')



第三个文件(歌单获取功能)
能通过酷狗音乐分享到QQ空间的链接获取歌单
[Python] 纯文本查看 复制代码
import  easygui
from lxml import etree
import requests
from ast import literal_eval

def get_song_list():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3872.0 Safari/537.36 Edg/78.0.244.0'}
    url = easygui.enterbox('请输入分享歌单的链接','输入连接')
    page = requests.get(url=url, headers=headers).text
    html = etree.HTML(page)


    end = html.xpath('//script[2]')[0].text.find('],//当前页面歌曲信息')
    print(end)
    song_list = literal_eval(html.xpath('//script[2]')[0].text[31:end])
    with open("数据/歌单列表.txt", "w",encoding="utf-8") as f:
        with open("数据/歌单哈希值列表.txt", "w") as d:
            print('歌单列表获取完成\n&#128071;以下是列表中的歌曲&#128071;')
            for i in song_list:
                song_name = i['audio_name']
                song_hash = i['hash']
                f.write(song_name + '\n')
                d.write(song_hash + '\n')
                print(song_name)


第四个文件(歌词编码转换)
这个……能把酷狗客户端下载的歌词从utf-8转到gbk避免在mp3等设备乱码(不用酷狗客户端的基本上用不到)
[Python] 纯文本查看 复制代码
def utf8_to_gbk():
    file_name = input('请输入lrc歌词文件名').replace('"','')
    with open(file_name,'r',encoding='utf-8') as f:
        content = f.read()
        print(content)

    with open(file_name,'w',encoding='gb18030') as f:
        f.write(content.replace('\ufeff', ''))


效果展示

image.png (38.33 KB, 下载次数: 0)

下载附件  保存到相册

功能选择

2020-2-22 12:40 上传


image.png (45.2 KB, 下载次数: 0)

下载附件  保存到相册

下载界面

2020-2-22 12:41 上传



使用教程
1.配置好python3环境
2.下载并解压
3.点击“点击启动.bat”

歌单下载功能:酷狗客户端右键歌单,点击"分享","QQ空间",会出现一个t*.kougou.com开头的链接,复制入程序即可
歌名搜索:直接输入歌名即可
哈希值下载:酷狗客户端右键,"歌曲信息",里面有个哈希值复制即可(可精准下载某些要下载vip的歌曲)
导入文件:填写哈希值文件列表批量下载,一行一个哈希值
歌词格式转换:复制歌词.lrc绝对路径输入即可

版权声明

本文来源互联网收集,如有版权问题请联系站长,谢谢。

本文链接:https://www.piaodoo.com/8119.html

评论