init: Search Hub - 统一多搜索引擎聚合服务

This commit is contained in:
2026-05-09 18:46:05 +08:00
commit 81d726179c
27 changed files with 3179 additions and 0 deletions

0
hub/__init__.py Normal file
View File

82
hub/config_manager.py Normal file
View File

@@ -0,0 +1,82 @@
"""Search Hub 配置管理器 — 读写本地配置文件"""
import os
import yaml
CONFIG_PATH = '/root/agentspace/projects/search-hub/config.yaml'
def load_hub_config() -> dict:
"""加载本地配置"""
if os.path.exists(CONFIG_PATH):
with open(CONFIG_PATH, 'r') as f:
return yaml.safe_load(f) or {}
return {}
def save_hub_config(cfg: dict):
"""保存配置到文件"""
os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True)
with open(CONFIG_PATH, 'w') as f:
yaml.dump(cfg, f, default_flow_style=False, allow_unicode=True)
def get_source_config(name: str) -> dict:
"""获取单个源的配置"""
cfg = load_hub_config()
sources = cfg.get('sources', {})
return sources.get(name, {})
def update_source_config(name: str, updates: dict):
"""更新单个源的配置并保存"""
cfg = load_hub_config()
if 'sources' not in cfg:
cfg['sources'] = {}
if name not in cfg['sources']:
cfg['sources'][name] = {}
cfg['sources'][name].update(updates)
save_hub_config(cfg)
def get_source_schema(name: str) -> list:
"""返回某个源的可配置字段定义"""
schemas = {
'tavily': [
{'key': 'api_key', 'label': 'API Key', 'type': 'password', 'required': True},
{'key': 'base_url', 'label': 'API 地址', 'type': 'text', 'required': False,
'default': 'https://api.tavily.com'},
],
'ai': [
{'key': 'api_key', 'label': 'API Key', 'type': 'password', 'required': True},
{'key': 'base_url', 'label': 'API 地址', 'type': 'text', 'required': False,
'default': 'https://opencode.ai/zen/go/v1'},
{'key': 'model', 'label': '模型名称', 'type': 'text', 'required': False,
'default': 'deepseek-v4-flash'},
],
'baidu': [
{'key': 'api_key', 'label': 'API Key', 'type': 'password', 'required': True},
{'key': 'intelligent_url', 'label': '智能检索接口', 'type': 'text', 'required': False,
'default': 'https://qianfan.baidubce.com/v2/ai_search/chat/completions'},
{'key': 'web_search_url', 'label': '网页搜索接口', 'type': 'text', 'required': False,
'default': 'https://qianfan.baidubce.com/v2/ai_search/web_search'},
{'key': 'vdb_access_key', 'label': 'VDB Access Key', 'type': 'text', 'required': False},
{'key': 'vdb_secret_key', 'label': 'VDB Secret Key', 'type': 'password', 'required': False},
],
'searxng': [
{'key': 'base_url', 'label': 'SearXNG 地址', 'type': 'text', 'required': False,
'default': 'http://localhost:8888'},
],
'baidu-intelligent': [], # 共享 baidu 配置,无需单独字段
'duckduckgo': [],
}
return schemas.get(name, [])
def mask_key(key: str) -> str:
"""脱敏显示密钥"""
if not key:
return ''
if len(key) <= 8:
return key[:2] + '****'
return key[:6] + '****' + key[-4:]

118
hub/quota.py Normal file
View File

@@ -0,0 +1,118 @@
"""用量查询 — Tavily 剩余额度 + 百度 VDB 免费额度"""
import time
import hmac
import hashlib
import datetime
import requests
def check_tavily_usage(api_key: str) -> dict:
if not api_key:
return {'error': '未配置 API Key', 'available': False}
try:
resp = requests.get(
'https://api.tavily.com/usage',
headers={'Authorization': f'Bearer {api_key}'},
timeout=10,
)
if resp.status_code == 429:
return {'error': '查询过于频繁10分钟内限10次', 'available': False}
if resp.status_code != 200:
return {'error': f'API 返回 {resp.status_code}', 'available': False}
data = resp.json()
key_data = data.get('key', {})
acct = data.get('account', {})
usage = key_data.get('usage', 0) or 0
limit = key_data.get('limit', 0) or 0
remaining = max(limit - usage, 0) if limit else 0
return {
'available': True,
'key': {'usage': usage, 'limit': limit, 'remaining': remaining},
'account': {
'plan_limit': acct.get('plan_limit', 0) or 0,
'search_usage': acct.get('search_usage', 0) or 0,
'paygo_usage': acct.get('paygo_usage', 0) or 0,
},
}
except requests.exceptions.Timeout:
return {'error': '请求超时', 'available': False}
except requests.exceptions.RequestException as e:
return {'error': f'网络错误: {e}', 'available': False}
def _bce_sign(access_key: str, secret_key: str, method: str, path: str,
headers: dict, params: dict = None) -> str:
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
expiration = 1800
auth_string = f'bce-auth-v1/{access_key}/{timestamp}/{expiration}'
signing_key = hmac.new(
secret_key.encode('utf-8'),
auth_string.encode('utf-8'),
hashlib.sha256,
).hexdigest()
signed_headers = sorted(headers.keys(), key=lambda k: k.lower())
canonical_headers = ''.join(
f'{k.lower()}:{headers[k].strip()}\n' for k in signed_headers
)
canonical_uri = path
if params:
canonical_query = '&'.join(
f'{k}={v}' for k, v in sorted(params.items())
)
else:
canonical_query = ''
canonical_request = f'{method}\n{canonical_uri}\n{canonical_query}\n{canonical_headers}'
signature = hmac.new(
signing_key.encode('utf-8'),
canonical_request.encode('utf-8'),
hashlib.sha256,
).hexdigest()
return (f'bce-auth-v1/{access_key}/{timestamp}/{expiration}/'
f'{";".join(k.lower() for k in signed_headers)}/{signature}')
def check_baidu_vdb_quota(access_key: str, secret_key: str) -> dict:
if not access_key or not secret_key:
return {'error': '未配置 VDB Access Key / Secret Key', 'available': False}
host = 'vdb.bj.baidubce.com'
path = '/v1/vdb/instance/freeQuota'
method = 'GET'
headers = {
'host': host,
}
auth = _bce_sign(access_key, secret_key, method, path, headers)
headers['Authorization'] = auth
headers['Content-Type'] = 'application/json'
try:
resp = requests.get(
f'https://{host}{path}',
headers=headers,
timeout=10,
)
if resp.status_code != 200:
return {'error': f'API 返回 {resp.status_code}', 'available': False}
data = resp.json()
free_quota = data.get('freeQuota', 0)
return {
'available': True,
'freeQuota': free_quota,
}
except requests.exceptions.Timeout:
return {'error': '请求超时', 'available': False}
except requests.exceptions.RequestException as e:
return {'error': f'网络错误: {e}', 'available': False}

112
hub/router.py Normal file
View File

@@ -0,0 +1,112 @@
"""搜索路由器 — 多源路由 + 结果去重合并"""
import time
from providers.base import SearchResult
class SearchRouter:
"""搜索路由器"""
def __init__(self, providers: dict):
"""
providers: {name: provider_instance}
"""
self.providers = providers
def search(self, query: str, source='auto', max_results=10):
"""
统一搜索入口
source 取值:
- 'auto' : 自动选择最优可用源
- 'tavily' : 指定单个源
- 'tavily,baidu' : 多源合并
"""
start = time.time()
if not source or source == 'auto':
results = self._auto_search(query, max_results)
used_source = results[0].source if results else None
elif ',' in source:
sources = [s.strip() for s in source.split(',') if s.strip()]
results = self._multi_search(query, sources, max_results)
used_source = source
else:
provider = self.providers.get(source)
if not provider or not provider.is_available():
return {
'query': query,
'results': [],
'total': 0,
'source': source,
'elapsed': round(time.time() - start, 2),
'error': f'搜索源 "{source}" 不可用',
}
raw = provider.search(query, max_results)
results = raw
used_source = source
elapsed = round(time.time() - start, 2)
return {
'query': query,
'results': [r.to_dict() for r in results],
'total': len(results),
'source': used_source,
'elapsed': elapsed,
}
def _auto_search(self, query, max_results):
"""自动选择:按优先级 fallback成功即返回"""
sorted_providers = sorted(
[p for p in self.providers.values() if p.is_available() and p.enabled],
key=lambda p: p.priority,
)
for provider in sorted_providers:
try:
results = provider.search(query, max_results)
if results:
return self._dedup(results, max_results)
except Exception:
continue
return []
def _multi_search(self, query, sources, max_results):
"""多源并发搜索"""
all_results = []
for name in sources:
provider = self.providers.get(name)
if not provider or not provider.is_available():
continue
try:
# 每个源搜 max_results 条
per_source = max(max_results // len(sources), 3)
results = provider.search(query, per_source)
all_results.extend(results)
except Exception:
continue
return self._dedup(all_results, max_results)
def get_sources(self):
"""返回所有搜索源状态"""
return [
p.get_status()
for p in sorted(self.providers.values(), key=lambda p: p.priority)
]
@staticmethod
def _dedup(results, max_results):
"""URL 去重 + 按分数排序"""
seen = set()
unique = []
for r in results:
key = r.url or r.title
if key and key not in seen:
seen.add(key)
unique.append(r)
# 按分数降序排列
unique.sort(key=lambda r: r.score, reverse=True)
return unique[:max_results]