lb_toolkits.download.downloadGithub 源代码

# -*- coding:utf-8 -*-
'''
@Project     : lb_toolkits

@File        : downloadGithub.py

@Modify Time :  2023/3/13 15:18   

@Author      : Lee    

@Version     : 1.0   

@Description :

'''
import json
import os
import requests
from urllib import parse

from lb_toolkits import parm

exedir = os.path.abspath(list(parm.__path__)[0])
from lb_toolkits.utils import spiderdownload

[文档] class downloadGithub : def __init__(self, username=None): self.username=username
[文档] def searfile(self, giturl=None, repositories=None, srcdir=None, branches='master') : if giturl is None : if self.username is None or repositories is None or srcdir is None : raise Exception('请设置【giturl】或者【username, repositories, srcdir】参数') else: giturl = 'https://github.com/{username}/{repositories}/tree/{branches}/{srcdir}'.format( username=self.username, repositories=repositories, srcdir=srcdir, branches=branches) return self.GetGitUrl(giturl)
[文档] def download(self, dstdir, srcurl): spider = spiderdownload() url = srcurl.replace('github.com', 'raw.githubusercontent.com') url = url.replace('/blob', '') url = url.replace('/tree', '') urlunqot = parse.unquote(url) filename = spider.download(dstdir, urlunqot) del spider return filename
[文档] def GetGitUrl(self, giturl): ''' 爬虫获取url中的链接''' # print(giturl) session = requests.Session() res = session.get(giturl) if not 'payload' in res.text : return [] resjson = json.loads(res.text) session.close() matchurl = [] if 'payload' in resjson : payload = resjson['payload'] if 'tree' in payload : for item in payload['tree']['items'] : if item['contentType'] in ['directory'] : srcurl = giturl + '/' + item['name'] url = self.GetGitUrl(srcurl) if len(url) != 0 : matchurl.extend(url) elif item['contentType'] in ['file'] : srcurl = giturl + '/' + item['name'] matchurl.append(srcurl) if 'fileTree' in payload : return [giturl] for tree in payload['fileTree'] : for item in payload['fileTree'][tree]['items'] : if item['contentType'] in ['directory'] : srcurl = giturl + '/' + item['name'] url = self.GetGitUrl(srcurl) if len(url) != 0 : matchurl.extend(url) elif item['contentType'] in ['file'] : srcurl = giturl + '/' + item['name'] matchurl.append(srcurl) return matchurl