2020-02-03 16:18:21 +08:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
import asyncio
|
|
|
|
|
import datetime
|
2023-07-05 00:02:28 +08:00
|
|
|
|
import hashlib
|
2020-02-03 16:18:21 +08:00
|
|
|
|
import logging
|
2020-02-04 16:12:16 +08:00
|
|
|
|
import re
|
2023-07-05 00:02:28 +08:00
|
|
|
|
import urllib.parse
|
2020-02-03 16:18:21 +08:00
|
|
|
|
from typing import *
|
|
|
|
|
|
|
|
|
|
import aiohttp
|
|
|
|
|
import sqlalchemy.exc
|
|
|
|
|
|
2021-04-20 21:30:57 +08:00
|
|
|
|
import config
|
2022-02-15 00:18:46 +08:00
|
|
|
|
import models.bilibili as bl_models
|
2020-02-03 16:18:21 +08:00
|
|
|
|
import models.database
|
2022-02-15 00:18:46 +08:00
|
|
|
|
import utils.request
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_AVATAR_URL = '//static.hdslb.com/images/member/noface.gif'
|
|
|
|
|
|
|
|
|
|
# user_id -> avatar_url
|
|
|
|
|
_avatar_url_cache: Dict[int, str] = {}
|
2020-02-05 12:58:26 +08:00
|
|
|
|
# 正在获取头像的Future,user_id -> Future
|
|
|
|
|
_uid_fetch_future_map: Dict[int, asyncio.Future] = {}
|
|
|
|
|
# 正在获取头像的user_id队列
|
2021-07-17 13:03:45 +08:00
|
|
|
|
_uid_queue_to_fetch: Optional[asyncio.Queue] = None
|
2020-02-05 12:58:26 +08:00
|
|
|
|
# 上次被B站ban时间
|
|
|
|
|
_last_fetch_banned_time: Optional[datetime.datetime] = None
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
2023-07-05 00:02:28 +08:00
|
|
|
|
# wbi密码表
|
|
|
|
|
WBI_KEY_INDEX_TABLE = [
|
|
|
|
|
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35,
|
|
|
|
|
27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13
|
|
|
|
|
]
|
|
|
|
|
# wbi鉴权口令
|
|
|
|
|
_wbi_key = ''
|
|
|
|
|
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
def init():
|
2021-04-20 21:30:57 +08:00
|
|
|
|
cfg = config.get_config()
|
|
|
|
|
global _uid_queue_to_fetch
|
|
|
|
|
_uid_queue_to_fetch = asyncio.Queue(cfg.fetch_avatar_max_queue_size)
|
2023-07-29 12:48:57 +08:00
|
|
|
|
asyncio.get_event_loop().create_task(_get_avatar_url_from_web_consumer())
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_avatar_url(user_id):
|
2020-09-13 21:24:20 +08:00
|
|
|
|
avatar_url = await get_avatar_url_or_none(user_id)
|
|
|
|
|
if avatar_url is None:
|
|
|
|
|
avatar_url = DEFAULT_AVATAR_URL
|
|
|
|
|
return avatar_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_avatar_url_or_none(user_id):
|
2023-07-06 23:25:07 +08:00
|
|
|
|
if user_id == 0:
|
|
|
|
|
return None
|
|
|
|
|
|
2020-02-03 16:18:21 +08:00
|
|
|
|
avatar_url = get_avatar_url_from_memory(user_id)
|
|
|
|
|
if avatar_url is not None:
|
|
|
|
|
return avatar_url
|
|
|
|
|
avatar_url = await get_avatar_url_from_database(user_id)
|
|
|
|
|
if avatar_url is not None:
|
|
|
|
|
return avatar_url
|
2020-09-13 21:24:20 +08:00
|
|
|
|
return await get_avatar_url_from_web(user_id)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_avatar_url_from_memory(user_id):
|
|
|
|
|
return _avatar_url_cache.get(user_id, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_avatar_url_from_database(user_id) -> Awaitable[Optional[str]]:
|
2023-07-29 12:48:57 +08:00
|
|
|
|
loop = asyncio.get_running_loop()
|
|
|
|
|
return loop.run_in_executor(None, _do_get_avatar_url_from_database, user_id, loop)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
|
2023-07-29 12:48:57 +08:00
|
|
|
|
def _do_get_avatar_url_from_database(user_id, loop: asyncio.AbstractEventLoop):
|
2020-02-03 16:18:21 +08:00
|
|
|
|
try:
|
|
|
|
|
with models.database.get_session() as session:
|
2023-07-29 01:07:04 +08:00
|
|
|
|
user = session.scalars(
|
|
|
|
|
sqlalchemy.select(bl_models.BilibiliUser).filter(
|
|
|
|
|
bl_models.BilibiliUser.uid == user_id
|
|
|
|
|
)
|
2022-02-15 00:18:46 +08:00
|
|
|
|
).one_or_none()
|
2020-02-03 16:18:21 +08:00
|
|
|
|
if user is None:
|
|
|
|
|
return None
|
|
|
|
|
avatar_url = user.avatar_url
|
|
|
|
|
|
|
|
|
|
# 如果离上次更新太久就更新所有缓存
|
|
|
|
|
if (datetime.datetime.now() - user.update_time).days >= 3:
|
|
|
|
|
def refresh_cache():
|
2020-02-05 12:58:26 +08:00
|
|
|
|
_avatar_url_cache.pop(user_id, None)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
get_avatar_url_from_web(user_id)
|
|
|
|
|
|
2023-07-29 12:48:57 +08:00
|
|
|
|
loop.call_soon_threadsafe(refresh_cache)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
else:
|
|
|
|
|
# 否则只更新内存缓存
|
|
|
|
|
_update_avatar_cache_in_memory(user_id, avatar_url)
|
2020-02-03 19:29:29 +08:00
|
|
|
|
except sqlalchemy.exc.OperationalError:
|
|
|
|
|
# SQLite会锁整个文件,忽略就行
|
|
|
|
|
return None
|
2020-02-03 16:18:21 +08:00
|
|
|
|
except sqlalchemy.exc.SQLAlchemyError:
|
2020-02-03 19:29:29 +08:00
|
|
|
|
logger.exception('_do_get_avatar_url_from_database failed:')
|
2020-02-03 16:18:21 +08:00
|
|
|
|
return None
|
|
|
|
|
return avatar_url
|
|
|
|
|
|
|
|
|
|
|
2020-02-04 16:12:16 +08:00
|
|
|
|
def get_avatar_url_from_web(user_id) -> Awaitable[Optional[str]]:
|
2020-02-05 12:58:26 +08:00
|
|
|
|
# 如果已有正在获取的future则返回,防止重复获取同一个uid
|
|
|
|
|
future = _uid_fetch_future_map.get(user_id, None)
|
|
|
|
|
if future is not None:
|
|
|
|
|
return future
|
|
|
|
|
# 否则创建一个获取任务
|
2023-07-29 12:48:57 +08:00
|
|
|
|
_uid_fetch_future_map[user_id] = future = asyncio.get_running_loop().create_future()
|
2020-02-05 12:58:26 +08:00
|
|
|
|
future.add_done_callback(lambda _future: _uid_fetch_future_map.pop(user_id, None))
|
2020-02-03 16:18:21 +08:00
|
|
|
|
try:
|
2020-02-05 12:58:26 +08:00
|
|
|
|
_uid_queue_to_fetch.put_nowait(user_id)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
except asyncio.QueueFull:
|
2020-02-04 16:12:16 +08:00
|
|
|
|
future.set_result(None)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
return future
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_avatar_url_from_web_consumer():
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
2020-02-05 12:58:26 +08:00
|
|
|
|
user_id = await _uid_queue_to_fetch.get()
|
|
|
|
|
future = _uid_fetch_future_map.get(user_id, None)
|
|
|
|
|
if future is None:
|
2020-02-03 16:18:21 +08:00
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 防止在被ban的时候获取
|
2020-02-05 12:58:26 +08:00
|
|
|
|
global _last_fetch_banned_time
|
|
|
|
|
if _last_fetch_banned_time is not None:
|
2020-02-03 16:18:21 +08:00
|
|
|
|
cur_time = datetime.datetime.now()
|
2020-02-05 12:58:26 +08:00
|
|
|
|
if (cur_time - _last_fetch_banned_time).total_seconds() < 3 * 60 + 3:
|
2020-02-04 16:12:16 +08:00
|
|
|
|
# 3分钟以内被ban,解封大约要15分钟
|
|
|
|
|
future.set_result(None)
|
2020-02-03 19:29:29 +08:00
|
|
|
|
continue
|
2020-02-03 16:18:21 +08:00
|
|
|
|
else:
|
2020-02-05 12:58:26 +08:00
|
|
|
|
_last_fetch_banned_time = None
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
2023-07-29 12:48:57 +08:00
|
|
|
|
asyncio.create_task(_get_avatar_url_from_web_coroutine(user_id, future))
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
# 限制频率,防止被B站ban
|
2021-04-20 21:30:57 +08:00
|
|
|
|
cfg = config.get_config()
|
|
|
|
|
await asyncio.sleep(cfg.fetch_avatar_interval)
|
2022-02-15 00:18:46 +08:00
|
|
|
|
except Exception: # noqa
|
2020-02-04 16:12:16 +08:00
|
|
|
|
logger.exception('_get_avatar_url_from_web_consumer error:')
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_avatar_url_from_web_coroutine(user_id, future):
|
|
|
|
|
try:
|
|
|
|
|
avatar_url = await _do_get_avatar_url_from_web(user_id)
|
|
|
|
|
except BaseException as e:
|
|
|
|
|
future.set_exception(e)
|
2020-02-05 12:58:26 +08:00
|
|
|
|
else:
|
|
|
|
|
future.set_result(avatar_url)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _do_get_avatar_url_from_web(user_id):
|
2023-07-05 00:02:28 +08:00
|
|
|
|
global _wbi_key
|
|
|
|
|
if _wbi_key == '':
|
|
|
|
|
# TODO 判断一下是否正在获取
|
|
|
|
|
_wbi_key = await _get_wbi_key()
|
|
|
|
|
|
2020-02-03 16:18:21 +08:00
|
|
|
|
try:
|
2022-02-15 00:18:46 +08:00
|
|
|
|
async with utils.request.http_session.get(
|
2023-07-05 00:02:28 +08:00
|
|
|
|
'https://api.bilibili.com/x/space/wbi/acc/info',
|
2022-06-18 21:31:25 +08:00
|
|
|
|
headers={
|
2022-09-28 00:11:29 +08:00
|
|
|
|
**utils.request.BILIBILI_COMMON_HEADERS,
|
|
|
|
|
'Origin': 'https://space.bilibili.com',
|
|
|
|
|
'Referer': f'https://space.bilibili.com/{user_id}/'
|
2022-06-18 21:31:25 +08:00
|
|
|
|
},
|
2023-07-05 00:02:28 +08:00
|
|
|
|
params=_add_wbi_sign({'mid': user_id}),
|
2022-02-15 00:18:46 +08:00
|
|
|
|
) as r:
|
2020-02-03 16:18:21 +08:00
|
|
|
|
if r.status != 200:
|
|
|
|
|
logger.warning('Failed to fetch avatar: status=%d %s uid=%d', r.status, r.reason, user_id)
|
2020-02-03 19:29:29 +08:00
|
|
|
|
if r.status == 412:
|
|
|
|
|
# 被B站ban了
|
2020-02-05 12:58:26 +08:00
|
|
|
|
global _last_fetch_banned_time
|
|
|
|
|
_last_fetch_banned_time = datetime.datetime.now()
|
2020-02-04 16:12:16 +08:00
|
|
|
|
return None
|
2020-02-03 16:18:21 +08:00
|
|
|
|
data = await r.json()
|
|
|
|
|
except (aiohttp.ClientConnectionError, asyncio.TimeoutError):
|
2020-02-04 16:12:16 +08:00
|
|
|
|
return None
|
|
|
|
|
|
2022-09-28 00:11:29 +08:00
|
|
|
|
if data['code'] != 0:
|
|
|
|
|
# 这里虽然失败但不会被ban一段时间
|
|
|
|
|
logger.info('Failed to fetch avatar: code=%d %s uid=%d', data['code'], data['message'], user_id)
|
2023-07-05 00:02:28 +08:00
|
|
|
|
if data['code'] == -403:
|
|
|
|
|
_wbi_key = ''
|
2022-09-28 00:11:29 +08:00
|
|
|
|
return None
|
|
|
|
|
|
2020-02-04 16:12:16 +08:00
|
|
|
|
avatar_url = process_avatar_url(data['data']['face'])
|
|
|
|
|
update_avatar_cache(user_id, avatar_url)
|
|
|
|
|
return avatar_url
|
|
|
|
|
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
2023-07-05 00:02:28 +08:00
|
|
|
|
async def _get_wbi_key():
|
|
|
|
|
try:
|
|
|
|
|
async with utils.request.http_session.get(
|
|
|
|
|
'https://api.bilibili.com/nav',
|
|
|
|
|
headers=utils.request.BILIBILI_COMMON_HEADERS,
|
|
|
|
|
) as r:
|
|
|
|
|
if r.status != 200:
|
|
|
|
|
logger.warning('Failed to get wbi key: status=%d %s', r.status, r.reason)
|
|
|
|
|
return ''
|
|
|
|
|
data = await r.json()
|
|
|
|
|
except (aiohttp.ClientConnectionError, asyncio.TimeoutError):
|
|
|
|
|
logger.exception('Failed to get wbi key:')
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
wbi_img = data['data']['wbi_img']
|
|
|
|
|
img_key = wbi_img['img_url'].rpartition('/')[2].partition('.')[0]
|
|
|
|
|
sub_key = wbi_img['sub_url'].rpartition('/')[2].partition('.')[0]
|
|
|
|
|
except KeyError:
|
|
|
|
|
logger.warning('Failed to get wbi key: data=%s', data)
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
shuffled_key = img_key + sub_key
|
|
|
|
|
wbi_key = []
|
|
|
|
|
for index in WBI_KEY_INDEX_TABLE:
|
|
|
|
|
if index < len(shuffled_key):
|
|
|
|
|
wbi_key.append(shuffled_key[index])
|
|
|
|
|
return ''.join(wbi_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _add_wbi_sign(params: dict):
|
|
|
|
|
if _wbi_key == '':
|
|
|
|
|
return params
|
|
|
|
|
|
|
|
|
|
wts = str(int(datetime.datetime.now().timestamp()))
|
|
|
|
|
params_to_sign = {**params, 'wts': wts}
|
|
|
|
|
|
|
|
|
|
# 按key字典序排序
|
|
|
|
|
params_to_sign = {
|
|
|
|
|
key: params_to_sign[key]
|
|
|
|
|
for key in sorted(params_to_sign.keys())
|
|
|
|
|
}
|
|
|
|
|
# 过滤一些字符
|
|
|
|
|
for key, value in params_to_sign.items():
|
|
|
|
|
value = ''.join(
|
|
|
|
|
ch
|
|
|
|
|
for ch in str(value)
|
|
|
|
|
if ch not in "!'()*"
|
|
|
|
|
)
|
|
|
|
|
params_to_sign[key] = value
|
|
|
|
|
|
|
|
|
|
str_to_sign = urllib.parse.urlencode(params_to_sign) + _wbi_key
|
|
|
|
|
w_rid = hashlib.md5(str_to_sign.encode('utf-8')).hexdigest()
|
|
|
|
|
return {
|
|
|
|
|
**params,
|
|
|
|
|
'wts': wts,
|
|
|
|
|
'w_rid': w_rid
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-02-04 16:12:16 +08:00
|
|
|
|
def process_avatar_url(avatar_url):
|
|
|
|
|
# 去掉协议,兼容HTTP、HTTPS
|
|
|
|
|
m = re.fullmatch(r'(?:https?:)?(.*)', avatar_url)
|
|
|
|
|
if m is not None:
|
|
|
|
|
avatar_url = m[1]
|
|
|
|
|
# 缩小图片加快传输
|
2020-02-03 16:18:21 +08:00
|
|
|
|
if not avatar_url.endswith('noface.gif'):
|
|
|
|
|
avatar_url += '@48w_48h'
|
|
|
|
|
return avatar_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_avatar_cache(user_id, avatar_url):
|
|
|
|
|
_update_avatar_cache_in_memory(user_id, avatar_url)
|
2023-07-29 12:48:57 +08:00
|
|
|
|
asyncio.get_running_loop().run_in_executor(
|
2020-02-03 16:18:21 +08:00
|
|
|
|
None, _update_avatar_cache_in_database, user_id, avatar_url
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _update_avatar_cache_in_memory(user_id, avatar_url):
|
|
|
|
|
_avatar_url_cache[user_id] = avatar_url
|
2021-04-20 21:30:57 +08:00
|
|
|
|
cfg = config.get_config()
|
|
|
|
|
while len(_avatar_url_cache) > cfg.avatar_cache_size:
|
2020-03-21 15:23:31 +08:00
|
|
|
|
_avatar_url_cache.pop(next(iter(_avatar_url_cache)), None)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _update_avatar_cache_in_database(user_id, avatar_url):
|
|
|
|
|
try:
|
|
|
|
|
with models.database.get_session() as session:
|
2023-07-29 01:07:04 +08:00
|
|
|
|
user = session.scalars(
|
|
|
|
|
sqlalchemy.select(bl_models.BilibiliUser).filter(
|
|
|
|
|
bl_models.BilibiliUser.uid == user_id
|
|
|
|
|
)
|
2022-02-15 00:18:46 +08:00
|
|
|
|
).one_or_none()
|
2020-02-03 16:18:21 +08:00
|
|
|
|
if user is None:
|
2022-02-15 00:18:46 +08:00
|
|
|
|
user = bl_models.BilibiliUser(
|
|
|
|
|
uid=user_id
|
|
|
|
|
)
|
2020-02-03 16:18:21 +08:00
|
|
|
|
session.add(user)
|
2022-02-15 00:18:46 +08:00
|
|
|
|
user.avatar_url = avatar_url
|
|
|
|
|
user.update_time = datetime.datetime.now()
|
2020-02-03 16:18:21 +08:00
|
|
|
|
session.commit()
|
2020-02-03 19:29:29 +08:00
|
|
|
|
except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.IntegrityError):
|
|
|
|
|
# SQLite会锁整个文件,忽略就行,另外还有多线程导致ID重复的问题
|
|
|
|
|
pass
|
2020-02-03 16:18:21 +08:00
|
|
|
|
except sqlalchemy.exc.SQLAlchemyError:
|
|
|
|
|
logger.exception('_update_avatar_cache_in_database failed:')
|