2020-02-06 12:48:20 +08:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
import asyncio
|
2021-07-18 14:53:36 +08:00
|
|
|
|
import base64
|
2023-08-05 21:25:59 +08:00
|
|
|
|
import dataclasses
|
2021-04-11 22:11:13 +08:00
|
|
|
|
import datetime
|
2023-08-05 21:25:59 +08:00
|
|
|
|
import enum
|
2020-02-06 12:48:20 +08:00
|
|
|
|
import functools
|
2021-04-11 22:11:13 +08:00
|
|
|
|
import hashlib
|
|
|
|
|
import hmac
|
|
|
|
|
import json
|
2020-02-06 12:48:20 +08:00
|
|
|
|
import logging
|
2021-04-23 21:35:57 +08:00
|
|
|
|
import random
|
2020-02-06 12:48:20 +08:00
|
|
|
|
import re
|
|
|
|
|
from typing import *
|
|
|
|
|
|
2022-02-15 00:18:46 +08:00
|
|
|
|
import Crypto.Cipher.AES as cry_aes # noqa
|
|
|
|
|
import Crypto.Util.Padding as cry_pad # noqa
|
2020-02-06 12:48:20 +08:00
|
|
|
|
import aiohttp
|
2023-07-30 22:22:47 +08:00
|
|
|
|
import cachetools
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2021-04-11 14:16:32 +08:00
|
|
|
|
import config
|
2022-02-15 00:18:46 +08:00
|
|
|
|
import utils.request
|
2021-04-11 14:16:32 +08:00
|
|
|
|
|
2020-02-06 12:48:20 +08:00
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
NO_TRANSLATE_TEXTS = {
|
|
|
|
|
'草', '草草', '草草草', '草生', '大草原', '上手', '上手上手', '理解', '理解理解', '天才', '天才天才',
|
2020-08-16 12:15:01 +08:00
|
|
|
|
'强', '余裕', '余裕余裕', '大丈夫', '再放送', '放送事故', '清楚', '清楚清楚'
|
2020-02-06 12:48:20 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_translate_providers: List['TranslateProvider'] = []
|
|
|
|
|
# text -> res
|
2023-07-30 22:22:47 +08:00
|
|
|
|
_translate_cache: Optional[cachetools.LRUCache] = None
|
2020-02-06 12:48:20 +08:00
|
|
|
|
# 正在翻译的Future,text -> Future
|
|
|
|
|
_text_future_map: Dict[str, asyncio.Future] = {}
|
2023-08-05 21:25:59 +08:00
|
|
|
|
# 正在翻译的任务队列,索引是优先级
|
|
|
|
|
_task_queues: List['asyncio.Queue[TranslateTask]'] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Priority(enum.IntEnum):
|
|
|
|
|
HIGH = 0
|
|
|
|
|
NORMAL = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclasses.dataclass
|
|
|
|
|
class TranslateTask:
|
|
|
|
|
priority: Priority
|
|
|
|
|
text: str
|
|
|
|
|
future: 'asyncio.Future[Optional[str]]'
|
|
|
|
|
remain_retry_count: int
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def init():
|
2023-07-30 22:22:47 +08:00
|
|
|
|
cfg = config.get_config()
|
2023-08-05 21:25:59 +08:00
|
|
|
|
global _translate_cache, _task_queues
|
2023-07-30 22:22:47 +08:00
|
|
|
|
_translate_cache = cachetools.LRUCache(cfg.translation_cache_size)
|
2023-08-05 21:25:59 +08:00
|
|
|
|
# 总队列长度会超过translate_max_queue_size,不用这么严格
|
|
|
|
|
_task_queues = [asyncio.Queue(cfg.translate_max_queue_size) for _ in range(len(Priority))]
|
2023-07-29 12:48:57 +08:00
|
|
|
|
asyncio.get_event_loop().create_task(_do_init())
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _do_init():
|
2021-04-11 14:16:32 +08:00
|
|
|
|
cfg = config.get_config()
|
2021-04-11 22:11:13 +08:00
|
|
|
|
if not cfg.enable_translate:
|
|
|
|
|
return
|
2021-04-11 14:16:32 +08:00
|
|
|
|
providers = []
|
|
|
|
|
for trans_cfg in cfg.translator_configs:
|
2021-04-11 22:11:13 +08:00
|
|
|
|
provider = create_translate_provider(trans_cfg)
|
2021-04-11 14:16:32 +08:00
|
|
|
|
if provider is not None:
|
|
|
|
|
providers.append(provider)
|
2020-02-06 12:48:20 +08:00
|
|
|
|
await asyncio.gather(*(provider.init() for provider in providers))
|
|
|
|
|
global _translate_providers
|
|
|
|
|
_translate_providers = providers
|
|
|
|
|
|
|
|
|
|
|
2021-04-11 22:11:13 +08:00
|
|
|
|
def create_translate_provider(cfg):
|
|
|
|
|
type_ = cfg['type']
|
|
|
|
|
if type_ == 'TencentTranslateFree':
|
|
|
|
|
return TencentTranslateFree(
|
2023-08-05 21:25:59 +08:00
|
|
|
|
cfg['query_interval'], cfg['source_language'], cfg['target_language']
|
2021-04-11 22:11:13 +08:00
|
|
|
|
)
|
|
|
|
|
elif type_ == 'TencentTranslate':
|
|
|
|
|
return TencentTranslate(
|
2023-08-05 21:25:59 +08:00
|
|
|
|
cfg['query_interval'], cfg['source_language'], cfg['target_language'],
|
|
|
|
|
cfg['secret_id'], cfg['secret_key'], cfg['region']
|
2021-04-11 22:11:13 +08:00
|
|
|
|
)
|
2021-04-23 21:35:57 +08:00
|
|
|
|
elif type_ == 'BaiduTranslate':
|
|
|
|
|
return BaiduTranslate(
|
2023-08-05 21:25:59 +08:00
|
|
|
|
cfg['query_interval'], cfg['source_language'], cfg['target_language'],
|
|
|
|
|
cfg['app_id'], cfg['secret']
|
2021-04-23 21:35:57 +08:00
|
|
|
|
)
|
2021-04-11 22:11:13 +08:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2020-02-06 12:48:20 +08:00
|
|
|
|
def need_translate(text):
|
|
|
|
|
text = text.strip()
|
|
|
|
|
# 没有中文,平时打不出的字不管
|
|
|
|
|
if not any(0x4E00 <= ord(c) <= 0x9FFF for c in text):
|
|
|
|
|
return False
|
|
|
|
|
# 含有日文假名
|
|
|
|
|
if any(0x3040 <= ord(c) <= 0x30FF for c in text):
|
|
|
|
|
return False
|
|
|
|
|
# 弹幕同传
|
2021-01-28 23:50:12 +08:00
|
|
|
|
if '【' in text:
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return False
|
|
|
|
|
# 中日双语
|
|
|
|
|
if text in NO_TRANSLATE_TEXTS:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_translation_from_cache(text):
|
|
|
|
|
key = text.strip().lower()
|
|
|
|
|
return _translate_cache.get(key, None)
|
|
|
|
|
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
def translate(text, priority=Priority.NORMAL) -> Awaitable[Optional[str]]:
|
2020-02-06 12:48:20 +08:00
|
|
|
|
key = text.strip().lower()
|
|
|
|
|
# 如果已有正在翻译的future则返回,防止重复翻译
|
|
|
|
|
future = _text_future_map.get(key, None)
|
|
|
|
|
if future is not None:
|
|
|
|
|
return future
|
|
|
|
|
# 否则创建一个翻译任务
|
2023-07-29 12:48:57 +08:00
|
|
|
|
future = asyncio.get_running_loop().create_future()
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
|
|
|
|
# 查缓存
|
|
|
|
|
res = _translate_cache.get(key, None)
|
|
|
|
|
if res is not None:
|
|
|
|
|
future.set_result(res)
|
|
|
|
|
return future
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
task = TranslateTask(
|
|
|
|
|
priority=priority,
|
|
|
|
|
text=text,
|
|
|
|
|
future=future,
|
|
|
|
|
remain_retry_count=3 if priority == Priority.HIGH else 1
|
|
|
|
|
)
|
|
|
|
|
if not _push_task(task):
|
2021-04-20 22:09:47 +08:00
|
|
|
|
future.set_result(None)
|
|
|
|
|
return future
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2021-04-20 22:09:47 +08:00
|
|
|
|
_text_future_map[key] = future
|
|
|
|
|
future.add_done_callback(functools.partial(_on_translate_done, key))
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return future
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _on_translate_done(key, future):
|
|
|
|
|
_text_future_map.pop(key, None)
|
|
|
|
|
# 缓存
|
|
|
|
|
try:
|
|
|
|
|
res = future.result()
|
2022-02-15 00:18:46 +08:00
|
|
|
|
except Exception: # noqa
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return
|
|
|
|
|
if res is None:
|
|
|
|
|
return
|
|
|
|
|
_translate_cache[key] = res
|
|
|
|
|
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
def _push_task(task: TranslateTask):
|
|
|
|
|
if not _has_available_translate_provider():
|
|
|
|
|
return False
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
queue = _task_queues[task.priority]
|
|
|
|
|
if not queue.full():
|
|
|
|
|
queue.put_nowait(task)
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return True
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
if task.priority != Priority.HIGH:
|
|
|
|
|
return False
|
2021-04-20 22:09:47 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
# 高优先级的尝试降级,挤掉低优先级的任务
|
|
|
|
|
queue = _task_queues[Priority.NORMAL]
|
|
|
|
|
if queue.full():
|
|
|
|
|
lower_task = queue.get_nowait()
|
|
|
|
|
lower_task.future.set_result(None)
|
|
|
|
|
queue.put_nowait(task)
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _pop_task() -> TranslateTask:
|
|
|
|
|
# 按优先级遍历,看是否已经有任务
|
|
|
|
|
for queue in _task_queues:
|
|
|
|
|
if not queue.empty():
|
|
|
|
|
return queue.get_nowait()
|
|
|
|
|
|
|
|
|
|
done_future_set, pending_future_set = await asyncio.wait(
|
|
|
|
|
[queue.get() for queue in _task_queues],
|
|
|
|
|
return_when=asyncio.FIRST_COMPLETED
|
|
|
|
|
)
|
|
|
|
|
for future in pending_future_set:
|
|
|
|
|
future.cancel()
|
|
|
|
|
|
|
|
|
|
# 如果有多个队列都取到任务了,只返回优先级最高的那一个,剩下的放回队列
|
|
|
|
|
assert len(done_future_set) != 0
|
|
|
|
|
tasks = [await future for future in done_future_set]
|
|
|
|
|
if len(tasks) > 1:
|
|
|
|
|
tasks.sort(key=lambda task_: task_.priority)
|
|
|
|
|
|
|
|
|
|
res = None
|
|
|
|
|
for task in tasks:
|
|
|
|
|
if res is None:
|
|
|
|
|
res = task
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if not _push_task(task):
|
|
|
|
|
task.future.set_result(None)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _cancel_all_tasks_if_no_available_translate_provider():
|
|
|
|
|
if _has_available_translate_provider():
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
logger.warning('No available translate provider')
|
|
|
|
|
for queue in _task_queues:
|
|
|
|
|
while not queue.empty():
|
|
|
|
|
task = queue.get_nowait()
|
|
|
|
|
task.future.set_result(None)
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
def _has_available_translate_provider():
|
|
|
|
|
return any(provider.is_available for provider in _translate_providers)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TranslateProvider:
|
|
|
|
|
def __init__(self, query_interval):
|
2021-04-11 14:16:32 +08:00
|
|
|
|
self._query_interval = query_interval
|
2023-08-05 21:25:59 +08:00
|
|
|
|
self._be_available_event = asyncio.Event()
|
|
|
|
|
self._be_available_event.set()
|
2021-04-11 14:16:32 +08:00
|
|
|
|
|
|
|
|
|
async def init(self):
|
2023-07-29 12:48:57 +08:00
|
|
|
|
asyncio.create_task(self._translate_consumer())
|
2021-04-11 14:16:32 +08:00
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def is_available(self):
|
2023-08-05 21:25:59 +08:00
|
|
|
|
return True
|
2021-04-20 22:09:47 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
def _on_availability_change(self):
|
|
|
|
|
if self.is_available:
|
|
|
|
|
self._be_available_event.set()
|
|
|
|
|
else:
|
|
|
|
|
self._be_available_event.clear()
|
|
|
|
|
_cancel_all_tasks_if_no_available_translate_provider()
|
2021-04-11 14:16:32 +08:00
|
|
|
|
|
|
|
|
|
async def _translate_consumer(self):
|
2023-08-05 21:25:59 +08:00
|
|
|
|
cls_name = type(self).__name__
|
2021-04-11 14:16:32 +08:00
|
|
|
|
while True:
|
|
|
|
|
try:
|
2023-08-05 21:25:59 +08:00
|
|
|
|
if not self.is_available:
|
|
|
|
|
logger.info('%s waiting to become available', cls_name)
|
|
|
|
|
await self._be_available_event.wait()
|
|
|
|
|
logger.info('%s became available', cls_name)
|
|
|
|
|
|
|
|
|
|
task = await _pop_task()
|
|
|
|
|
# 为了简化代码,约定只会在_translate_wrapper里变成不可用,所以获取task之后这里还是可用的
|
|
|
|
|
assert self.is_available
|
|
|
|
|
|
|
|
|
|
start_time = datetime.datetime.now()
|
|
|
|
|
await self._translate_wrapper(task)
|
|
|
|
|
cost_time = (datetime.datetime.now() - start_time).total_seconds()
|
|
|
|
|
|
2021-04-11 14:16:32 +08:00
|
|
|
|
# 频率限制
|
2023-08-05 21:25:59 +08:00
|
|
|
|
await asyncio.sleep(self._query_interval - cost_time)
|
2022-02-15 00:18:46 +08:00
|
|
|
|
except Exception: # noqa
|
2023-08-05 21:25:59 +08:00
|
|
|
|
logger.exception('%s error:', cls_name)
|
2021-04-11 14:16:32 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
async def _translate_wrapper(self, task: TranslateTask) -> Optional[str]:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
try:
|
2023-08-05 21:25:59 +08:00
|
|
|
|
exc = None
|
|
|
|
|
task.remain_retry_count -= 1
|
|
|
|
|
res = await self._do_translate(task.text)
|
2021-04-11 14:16:32 +08:00
|
|
|
|
except BaseException as e:
|
2023-08-05 21:25:59 +08:00
|
|
|
|
exc = e
|
|
|
|
|
res = None
|
|
|
|
|
if res is not None:
|
|
|
|
|
task.future.set_result(res)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
if task.remain_retry_count > 0:
|
|
|
|
|
# 还可以重试则放回队列
|
|
|
|
|
if not _push_task(task):
|
|
|
|
|
task.future.set_result(None)
|
2021-04-11 14:16:32 +08:00
|
|
|
|
else:
|
2023-08-05 21:25:59 +08:00
|
|
|
|
# 否则设置异常或None结果
|
|
|
|
|
if exc is not None:
|
|
|
|
|
task.future.set_exception(exc)
|
|
|
|
|
else:
|
|
|
|
|
task.future.set_result(None)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
async def _do_translate(self, text) -> Optional[str]:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
class TencentTranslateFree(TranslateProvider):
|
|
|
|
|
def __init__(self, query_interval, source_language, target_language):
|
|
|
|
|
super().__init__(query_interval)
|
|
|
|
|
self._be_available_event.clear() # _do_init之后才可用
|
2021-04-11 14:16:32 +08:00
|
|
|
|
self._source_language = source_language
|
|
|
|
|
self._target_language = target_language
|
|
|
|
|
|
2021-07-18 14:53:36 +08:00
|
|
|
|
self._server_time_delta = 0
|
|
|
|
|
self._uc_key = self._uc_iv = ''
|
|
|
|
|
self._qtv = self._qtk = ''
|
2020-03-21 18:42:27 +08:00
|
|
|
|
self._reinit_future = None
|
2021-07-18 14:53:36 +08:00
|
|
|
|
|
2020-02-06 12:48:20 +08:00
|
|
|
|
# 连续失败的次数
|
|
|
|
|
self._fail_count = 0
|
|
|
|
|
|
|
|
|
|
async def init(self):
|
2021-04-11 14:16:32 +08:00
|
|
|
|
if not await super().init():
|
|
|
|
|
return False
|
2023-07-29 12:48:57 +08:00
|
|
|
|
self._reinit_future = asyncio.create_task(self._reinit_coroutine())
|
2021-04-11 14:16:32 +08:00
|
|
|
|
return True
|
2020-03-21 18:42:27 +08:00
|
|
|
|
|
2020-05-09 21:38:21 +08:00
|
|
|
|
async def _do_init(self):
|
2020-02-06 12:48:20 +08:00
|
|
|
|
try:
|
2022-02-15 00:18:46 +08:00
|
|
|
|
async with utils.request.http_session.get('https://fanyi.qq.com/') as r:
|
2020-11-24 23:17:30 +08:00
|
|
|
|
if r.status != 200:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.warning('TencentTranslateFree init request failed: status=%d %s', r.status, r.reason)
|
2020-11-24 23:17:30 +08:00
|
|
|
|
return False
|
|
|
|
|
html = await r.text()
|
|
|
|
|
|
2021-07-18 14:53:36 +08:00
|
|
|
|
try:
|
|
|
|
|
server_time = r.headers['Date']
|
|
|
|
|
server_time = datetime.datetime.strptime(server_time, '%a, %d %b %Y %H:%M:%S GMT')
|
|
|
|
|
server_time = server_time.replace(tzinfo=datetime.timezone.utc).timestamp()
|
|
|
|
|
self._server_time_delta = int((datetime.datetime.now().timestamp() - server_time) * 1000)
|
|
|
|
|
except (KeyError, ValueError):
|
|
|
|
|
self._server_time_delta = 0
|
2023-07-29 01:22:21 +08:00
|
|
|
|
except (aiohttp.ClientError, asyncio.TimeoutError):
|
2021-07-18 14:53:36 +08:00
|
|
|
|
logger.exception('TencentTranslateFree init error:')
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# 获取token URL
|
|
|
|
|
m = re.search(r"""\breauthuri\s*=\s*['"](.+?)['"]""", html)
|
|
|
|
|
if m is None:
|
|
|
|
|
logger.exception('TencentTranslateFree init failed: reauthuri not found')
|
|
|
|
|
return False
|
|
|
|
|
reauthuri = m[1]
|
2020-11-24 23:17:30 +08:00
|
|
|
|
|
2021-07-18 14:53:36 +08:00
|
|
|
|
# 获取验证用的key、iv
|
|
|
|
|
m = re.search(r"""\s*=\s*['"]((?:\w+\|\w+-)+\w+\|\w+)['"]""", html)
|
|
|
|
|
if m is None:
|
|
|
|
|
logger.exception('TencentTranslateFree init failed: initial global variables not found')
|
|
|
|
|
return False
|
|
|
|
|
uc_key = None
|
|
|
|
|
uc_iv = None
|
|
|
|
|
for item in m[1].split('-'):
|
|
|
|
|
key, _, value = item.partition('|')
|
|
|
|
|
if key == 'a137':
|
|
|
|
|
uc_key = value
|
|
|
|
|
elif key == 'E74':
|
|
|
|
|
uc_iv = value
|
|
|
|
|
if uc_key is not None and uc_iv is not None:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 获取token
|
|
|
|
|
try:
|
2022-02-15 00:18:46 +08:00
|
|
|
|
async with utils.request.http_session.post('https://fanyi.qq.com/api/' + reauthuri) as r:
|
2020-02-06 12:48:20 +08:00
|
|
|
|
if r.status != 200:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.warning('TencentTranslateFree init request failed: reauthuri=%s, status=%d %s',
|
2020-11-28 22:05:44 +08:00
|
|
|
|
reauthuri, r.status, r.reason)
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return False
|
2020-11-17 21:56:01 +08:00
|
|
|
|
data = await r.json()
|
2023-07-29 01:22:21 +08:00
|
|
|
|
except (aiohttp.ClientError, asyncio.TimeoutError):
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.exception('TencentTranslateFree init error:')
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return False
|
|
|
|
|
|
2020-11-17 21:56:01 +08:00
|
|
|
|
qtv = data.get('qtv', None)
|
|
|
|
|
if qtv is None:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.warning('TencentTranslateFree init failed: qtv not found')
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return False
|
2020-11-17 21:56:01 +08:00
|
|
|
|
qtk = data.get('qtk', None)
|
|
|
|
|
if qtk is None:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.warning('TencentTranslateFree init failed: qtk not found')
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return False
|
2020-03-21 18:42:27 +08:00
|
|
|
|
|
2021-07-18 14:53:36 +08:00
|
|
|
|
self._uc_key = uc_key
|
|
|
|
|
self._uc_iv = uc_iv
|
2020-03-21 19:13:06 +08:00
|
|
|
|
self._qtv = qtv
|
|
|
|
|
self._qtk = qtk
|
2023-08-05 21:25:59 +08:00
|
|
|
|
|
|
|
|
|
self._on_availability_change()
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return True
|
|
|
|
|
|
2020-03-21 18:42:27 +08:00
|
|
|
|
async def _reinit_coroutine(self):
|
|
|
|
|
try:
|
2020-05-09 21:38:21 +08:00
|
|
|
|
while True:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.debug('TencentTranslateFree reinit')
|
2023-08-05 21:25:59 +08:00
|
|
|
|
start_time = datetime.datetime.now()
|
|
|
|
|
try:
|
|
|
|
|
await self._do_init()
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
raise
|
|
|
|
|
except BaseException: # noqa
|
|
|
|
|
pass
|
|
|
|
|
cost_time = (datetime.datetime.now() - start_time).total_seconds()
|
|
|
|
|
|
|
|
|
|
await asyncio.sleep(30 - cost_time)
|
2020-03-21 18:42:27 +08:00
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
pass
|
|
|
|
|
|
2020-02-06 12:48:20 +08:00
|
|
|
|
@property
|
|
|
|
|
def is_available(self):
|
2021-07-18 14:53:36 +08:00
|
|
|
|
return '' not in (self._uc_key, self._uc_iv, self._qtv, self._qtk) and super().is_available
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
async def _translate_wrapper(self, task: TranslateTask) -> Optional[str]:
|
|
|
|
|
res = await super()._translate_wrapper(task)
|
|
|
|
|
if res is not None:
|
2020-02-06 12:48:20 +08:00
|
|
|
|
self._fail_count = 0
|
2023-08-05 21:25:59 +08:00
|
|
|
|
else:
|
|
|
|
|
self._on_fail()
|
|
|
|
|
return res
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
async def _do_translate(self, text) -> Optional[str]:
|
2020-02-06 12:48:20 +08:00
|
|
|
|
try:
|
2022-02-15 00:18:46 +08:00
|
|
|
|
async with utils.request.http_session.post(
|
2020-02-06 12:48:20 +08:00
|
|
|
|
'https://fanyi.qq.com/api/translate',
|
|
|
|
|
headers={
|
2021-07-18 14:53:36 +08:00
|
|
|
|
'Referer': 'https://fanyi.qq.com/',
|
|
|
|
|
'uc': self._get_uc()
|
2020-02-06 12:48:20 +08:00
|
|
|
|
},
|
|
|
|
|
data={
|
2021-04-11 14:16:32 +08:00
|
|
|
|
'source': self._source_language,
|
|
|
|
|
'target': self._target_language,
|
2020-02-06 12:48:20 +08:00
|
|
|
|
'sourceText': text,
|
|
|
|
|
'qtv': self._qtv,
|
|
|
|
|
'qtk': self._qtk
|
|
|
|
|
}
|
|
|
|
|
) as r:
|
|
|
|
|
if r.status != 200:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.warning('TencentTranslateFree request failed: status=%d %s', r.status, r.reason)
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return None
|
2021-07-18 14:53:36 +08:00
|
|
|
|
self._update_uc_key(r)
|
2020-02-06 12:48:20 +08:00
|
|
|
|
data = await r.json()
|
2023-07-29 01:22:21 +08:00
|
|
|
|
except (aiohttp.ClientError, asyncio.TimeoutError):
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return None
|
|
|
|
|
if data['errCode'] != 0:
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.warning('TencentTranslateFree failed: %d %s', data['errCode'], data['errMsg'])
|
2020-02-06 12:48:20 +08:00
|
|
|
|
return None
|
2020-03-21 18:42:27 +08:00
|
|
|
|
res = ''.join(record['targetText'] for record in data['translate']['records'])
|
|
|
|
|
if res == '' and text.strip() != '':
|
|
|
|
|
# qtv、qtk过期
|
2023-08-05 21:25:59 +08:00
|
|
|
|
logger.info('TencentTranslateFree result is empty %s', data)
|
2020-03-21 18:42:27 +08:00
|
|
|
|
return None
|
|
|
|
|
return res
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2021-07-18 14:53:36 +08:00
|
|
|
|
def _get_uc(self):
|
|
|
|
|
user_actions = self._gen_user_actions()
|
|
|
|
|
cur_timestamp = str(int(datetime.datetime.now().timestamp() * 1000))
|
|
|
|
|
server_time_delta = str(self._server_time_delta)
|
|
|
|
|
uc = '|'.join([user_actions, cur_timestamp, server_time_delta])
|
|
|
|
|
|
|
|
|
|
aes = cry_aes.new(self._uc_key.encode('utf-8'), cry_aes.MODE_CBC, self._uc_iv.encode('utf-8'))
|
|
|
|
|
uc = cry_pad.pad(uc.encode('utf-8'), aes.block_size, 'pkcs7')
|
|
|
|
|
uc = aes.encrypt(uc)
|
|
|
|
|
uc = base64.b64encode(uc).decode('utf-8')
|
|
|
|
|
return uc
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _gen_user_actions():
|
|
|
|
|
# 1:点击翻译;2:源输入框聚焦或失去焦点;3:点击源语言列表;4:点击交换语言;5:点击目标语言列表;6:源输入框输入、粘贴
|
|
|
|
|
user_actions = []
|
|
|
|
|
if random.randint(1, 5) == 1:
|
|
|
|
|
for i in range(random.randint(1, 2)):
|
|
|
|
|
user_actions.append('2')
|
|
|
|
|
user_actions.append('6')
|
|
|
|
|
for i in range(random.randint(0, 6)):
|
|
|
|
|
user_actions.append(random.choice('26'))
|
|
|
|
|
if random.randint(1, 5) == 1:
|
|
|
|
|
user_actions.append('1')
|
|
|
|
|
return ''.join(user_actions)
|
|
|
|
|
|
|
|
|
|
def _update_uc_key(self, r):
|
|
|
|
|
try:
|
|
|
|
|
hf_f = r.headers['f']
|
|
|
|
|
hf_ts = int(r.headers['ts'])
|
|
|
|
|
except (KeyError, ValueError):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
cur_timestamp = int(datetime.datetime.now().timestamp() * 1000)
|
|
|
|
|
hf_f = base64.b64decode(hf_f.encode('utf-8')).decode('utf-8')
|
|
|
|
|
pos = int(hf_f[72: 72 + 4])
|
|
|
|
|
uc_key = hf_f[pos: pos + 16]
|
|
|
|
|
uc_iv = hf_f[pos + 16: pos + 16 + 16]
|
|
|
|
|
|
|
|
|
|
self._server_time_delta = cur_timestamp - hf_ts
|
|
|
|
|
self._uc_key = uc_key
|
|
|
|
|
self._uc_iv = uc_iv
|
|
|
|
|
|
2020-02-06 12:48:20 +08:00
|
|
|
|
def _on_fail(self):
|
|
|
|
|
self._fail_count += 1
|
2023-07-31 23:29:02 +08:00
|
|
|
|
# 为了可靠性,连续失败5次时冷却直到下次重新init
|
|
|
|
|
if self._fail_count >= 5:
|
2021-03-28 18:30:31 +08:00
|
|
|
|
self._cool_down()
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2021-03-28 18:30:31 +08:00
|
|
|
|
def _cool_down(self):
|
2021-04-11 14:16:32 +08:00
|
|
|
|
logger.info('TencentTranslateFree is cooling down')
|
|
|
|
|
# 下次_do_init后恢复
|
2021-07-18 14:53:36 +08:00
|
|
|
|
self._uc_key = self._uc_iv = ''
|
2020-02-06 12:48:20 +08:00
|
|
|
|
self._qtv = self._qtk = ''
|
2021-03-28 18:30:31 +08:00
|
|
|
|
self._fail_count = 0
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
self._on_availability_change()
|
|
|
|
|
|
2020-02-06 12:48:20 +08:00
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
class TencentTranslate(TranslateProvider):
|
|
|
|
|
def __init__(self, query_interval, source_language, target_language,
|
2021-04-11 22:11:13 +08:00
|
|
|
|
secret_id, secret_key, region):
|
2023-08-05 21:25:59 +08:00
|
|
|
|
super().__init__(query_interval)
|
2021-04-11 22:11:13 +08:00
|
|
|
|
self._source_language = source_language
|
|
|
|
|
self._target_language = target_language
|
|
|
|
|
self._secret_id = secret_id
|
|
|
|
|
self._secret_key = secret_key
|
|
|
|
|
self._region = region
|
|
|
|
|
|
|
|
|
|
self._cool_down_timer_handle = None
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def is_available(self):
|
|
|
|
|
return self._cool_down_timer_handle is None and super().is_available
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
async def _do_translate(self, text) -> Optional[str]:
|
2021-04-11 22:11:13 +08:00
|
|
|
|
try:
|
|
|
|
|
async with self._request_tencent_cloud(
|
|
|
|
|
'TextTranslate',
|
|
|
|
|
'2018-03-21',
|
|
|
|
|
{
|
|
|
|
|
'SourceText': text,
|
|
|
|
|
'Source': self._source_language,
|
|
|
|
|
'Target': self._target_language,
|
|
|
|
|
'ProjectId': 0
|
|
|
|
|
}
|
|
|
|
|
) as r:
|
|
|
|
|
if r.status != 200:
|
|
|
|
|
logger.warning('TencentTranslate request failed: status=%d %s', r.status, r.reason)
|
|
|
|
|
return None
|
|
|
|
|
data = (await r.json())['Response']
|
2023-07-29 11:52:09 +08:00
|
|
|
|
except (aiohttp.ClientConnectionError, asyncio.TimeoutError):
|
2021-04-11 22:11:13 +08:00
|
|
|
|
return None
|
|
|
|
|
error = data.get('Error', None)
|
|
|
|
|
if error is not None:
|
|
|
|
|
logger.warning('TencentTranslate failed: %s %s, RequestId=%s', error['Code'],
|
|
|
|
|
error['Message'], data['RequestId'])
|
|
|
|
|
self._on_fail(error['Code'])
|
|
|
|
|
return None
|
|
|
|
|
return data['TargetText']
|
|
|
|
|
|
|
|
|
|
def _request_tencent_cloud(self, action, version, body):
|
|
|
|
|
body_bytes = json.dumps(body).encode('utf-8')
|
|
|
|
|
|
|
|
|
|
canonical_headers = 'content-type:application/json; charset=utf-8\nhost:tmt.tencentcloudapi.com\n'
|
|
|
|
|
signed_headers = 'content-type;host'
|
|
|
|
|
hashed_request_payload = hashlib.sha256(body_bytes).hexdigest()
|
|
|
|
|
canonical_request = f'POST\n/\n\n{canonical_headers}\n{signed_headers}\n{hashed_request_payload}'
|
|
|
|
|
|
|
|
|
|
request_timestamp = int(datetime.datetime.now().timestamp())
|
|
|
|
|
date = datetime.datetime.utcfromtimestamp(request_timestamp).strftime('%Y-%m-%d')
|
|
|
|
|
credential_scope = f'{date}/tmt/tc3_request'
|
|
|
|
|
hashed_canonical_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
|
|
|
|
|
string_to_sign = f'TC3-HMAC-SHA256\n{request_timestamp}\n{credential_scope}\n{hashed_canonical_request}'
|
|
|
|
|
|
|
|
|
|
def sign(key, msg):
|
|
|
|
|
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
|
|
|
|
|
|
|
|
|
|
secret_date = sign(('TC3' + self._secret_key).encode('utf-8'), date)
|
|
|
|
|
secret_service = sign(secret_date, 'tmt')
|
|
|
|
|
secret_signing = sign(secret_service, 'tc3_request')
|
|
|
|
|
signature = hmac.new(secret_signing, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()
|
|
|
|
|
|
|
|
|
|
authorization = (
|
|
|
|
|
f'TC3-HMAC-SHA256 Credential={self._secret_id}/{credential_scope}, '
|
|
|
|
|
f'SignedHeaders={signed_headers}, Signature={signature}'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
'Authorization': authorization,
|
|
|
|
|
'Content-Type': 'application/json; charset=utf-8',
|
|
|
|
|
'X-TC-Action': action,
|
|
|
|
|
'X-TC-Version': version,
|
|
|
|
|
'X-TC-Timestamp': str(request_timestamp),
|
|
|
|
|
'X-TC-Region': self._region
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-15 00:18:46 +08:00
|
|
|
|
return utils.request.http_session.post('https://tmt.tencentcloudapi.com/', headers=headers, data=body_bytes)
|
2021-04-11 22:11:13 +08:00
|
|
|
|
|
|
|
|
|
def _on_fail(self, code):
|
|
|
|
|
if self._cool_down_timer_handle is not None:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
sleep_time = 0
|
|
|
|
|
if code == 'FailedOperation.NoFreeAmount':
|
|
|
|
|
# 下个月恢复免费额度
|
|
|
|
|
cur_time = datetime.datetime.now()
|
|
|
|
|
year = cur_time.year
|
|
|
|
|
month = cur_time.month + 1
|
|
|
|
|
if month > 12:
|
|
|
|
|
year += 1
|
|
|
|
|
month = 1
|
|
|
|
|
next_month_time = datetime.datetime(year, month, 1, minute=5)
|
|
|
|
|
sleep_time = (next_month_time - cur_time).total_seconds()
|
|
|
|
|
# Python 3.8之前不能超过一天
|
|
|
|
|
sleep_time = min(sleep_time, 24 * 60 * 60 - 1)
|
|
|
|
|
elif code in ('FailedOperation.ServiceIsolate', 'LimitExceeded'):
|
|
|
|
|
# 需要手动处理,等5分钟
|
|
|
|
|
sleep_time = 5 * 60
|
|
|
|
|
if sleep_time != 0:
|
2023-07-29 12:48:57 +08:00
|
|
|
|
self._cool_down_timer_handle = asyncio.get_running_loop().call_later(
|
2021-04-11 22:11:13 +08:00
|
|
|
|
sleep_time, self._on_cool_down_timeout
|
|
|
|
|
)
|
2023-08-05 21:25:59 +08:00
|
|
|
|
self._on_availability_change()
|
2021-04-11 22:11:13 +08:00
|
|
|
|
|
|
|
|
|
def _on_cool_down_timeout(self):
|
|
|
|
|
self._cool_down_timer_handle = None
|
2023-08-05 21:25:59 +08:00
|
|
|
|
self._on_availability_change()
|
2021-04-23 21:35:57 +08:00
|
|
|
|
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
class BaiduTranslate(TranslateProvider):
|
|
|
|
|
def __init__(self, query_interval, source_language, target_language,
|
2021-04-23 21:35:57 +08:00
|
|
|
|
app_id, secret):
|
2023-08-05 21:25:59 +08:00
|
|
|
|
super().__init__(query_interval)
|
2021-04-23 21:35:57 +08:00
|
|
|
|
self._source_language = source_language
|
|
|
|
|
self._target_language = target_language
|
|
|
|
|
self._app_id = app_id
|
|
|
|
|
self._secret = secret
|
|
|
|
|
|
|
|
|
|
self._cool_down_timer_handle = None
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def is_available(self):
|
|
|
|
|
return self._cool_down_timer_handle is None and super().is_available
|
|
|
|
|
|
2023-08-05 21:25:59 +08:00
|
|
|
|
async def _do_translate(self, text) -> Optional[str]:
|
2021-04-23 21:35:57 +08:00
|
|
|
|
try:
|
2022-02-15 00:18:46 +08:00
|
|
|
|
async with utils.request.http_session.post(
|
2021-04-23 21:35:57 +08:00
|
|
|
|
'https://fanyi-api.baidu.com/api/trans/vip/translate',
|
|
|
|
|
data=self._add_sign({
|
|
|
|
|
'q': text,
|
|
|
|
|
'from': self._source_language,
|
|
|
|
|
'to': self._target_language,
|
|
|
|
|
'appid': self._app_id,
|
|
|
|
|
'salt': random.randint(1, 999999999)
|
|
|
|
|
})
|
|
|
|
|
) as r:
|
|
|
|
|
if r.status != 200:
|
|
|
|
|
logger.warning('BaiduTranslate request failed: status=%d %s', r.status, r.reason)
|
|
|
|
|
return None
|
|
|
|
|
data = await r.json()
|
2023-07-29 11:52:09 +08:00
|
|
|
|
except (aiohttp.ClientConnectionError, asyncio.TimeoutError):
|
2021-04-23 21:35:57 +08:00
|
|
|
|
return None
|
|
|
|
|
error_code = data.get('error_code', None)
|
|
|
|
|
if error_code is not None:
|
|
|
|
|
logger.warning('BaiduTranslate failed: %s %s', error_code, data['error_msg'])
|
|
|
|
|
self._on_fail(error_code)
|
|
|
|
|
return None
|
|
|
|
|
return ''.join(result['dst'] for result in data['trans_result'])
|
|
|
|
|
|
|
|
|
|
def _add_sign(self, data):
|
|
|
|
|
str_to_sign = f"{self._app_id}{data['q']}{data['salt']}{self._secret}"
|
|
|
|
|
sign = hashlib.md5(str_to_sign.encode('utf-8')).hexdigest()
|
|
|
|
|
return {**data, 'sign': sign}
|
|
|
|
|
|
|
|
|
|
def _on_fail(self, code):
|
|
|
|
|
if self._cool_down_timer_handle is not None:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
sleep_time = 0
|
|
|
|
|
if code == '54004':
|
|
|
|
|
# 账户余额不足,需要手动处理,等5分钟
|
|
|
|
|
sleep_time = 5 * 60
|
|
|
|
|
if sleep_time != 0:
|
2023-07-29 12:48:57 +08:00
|
|
|
|
self._cool_down_timer_handle = asyncio.get_running_loop().call_later(
|
2021-04-23 21:35:57 +08:00
|
|
|
|
sleep_time, self._on_cool_down_timeout
|
|
|
|
|
)
|
2023-08-05 21:25:59 +08:00
|
|
|
|
self._on_availability_change()
|
2021-04-23 21:35:57 +08:00
|
|
|
|
|
|
|
|
|
def _on_cool_down_timeout(self):
|
|
|
|
|
self._cool_down_timer_handle = None
|
2023-08-05 21:25:59 +08:00
|
|
|
|
self._on_availability_change()
|