init: Search Hub - 统一多搜索引擎聚合服务
This commit is contained in:
0
hub/__init__.py
Normal file
0
hub/__init__.py
Normal file
82
hub/config_manager.py
Normal file
82
hub/config_manager.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Search Hub 配置管理器 — 读写本地配置文件"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
|
||||
CONFIG_PATH = '/root/agentspace/projects/search-hub/config.yaml'
|
||||
|
||||
|
||||
def load_hub_config() -> dict:
|
||||
"""加载本地配置"""
|
||||
if os.path.exists(CONFIG_PATH):
|
||||
with open(CONFIG_PATH, 'r') as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
return {}
|
||||
|
||||
|
||||
def save_hub_config(cfg: dict):
|
||||
"""保存配置到文件"""
|
||||
os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True)
|
||||
with open(CONFIG_PATH, 'w') as f:
|
||||
yaml.dump(cfg, f, default_flow_style=False, allow_unicode=True)
|
||||
|
||||
|
||||
def get_source_config(name: str) -> dict:
|
||||
"""获取单个源的配置"""
|
||||
cfg = load_hub_config()
|
||||
sources = cfg.get('sources', {})
|
||||
return sources.get(name, {})
|
||||
|
||||
|
||||
def update_source_config(name: str, updates: dict):
|
||||
"""更新单个源的配置并保存"""
|
||||
cfg = load_hub_config()
|
||||
if 'sources' not in cfg:
|
||||
cfg['sources'] = {}
|
||||
if name not in cfg['sources']:
|
||||
cfg['sources'][name] = {}
|
||||
cfg['sources'][name].update(updates)
|
||||
save_hub_config(cfg)
|
||||
|
||||
|
||||
def get_source_schema(name: str) -> list:
|
||||
"""返回某个源的可配置字段定义"""
|
||||
schemas = {
|
||||
'tavily': [
|
||||
{'key': 'api_key', 'label': 'API Key', 'type': 'password', 'required': True},
|
||||
{'key': 'base_url', 'label': 'API 地址', 'type': 'text', 'required': False,
|
||||
'default': 'https://api.tavily.com'},
|
||||
],
|
||||
'ai': [
|
||||
{'key': 'api_key', 'label': 'API Key', 'type': 'password', 'required': True},
|
||||
{'key': 'base_url', 'label': 'API 地址', 'type': 'text', 'required': False,
|
||||
'default': 'https://opencode.ai/zen/go/v1'},
|
||||
{'key': 'model', 'label': '模型名称', 'type': 'text', 'required': False,
|
||||
'default': 'deepseek-v4-flash'},
|
||||
],
|
||||
'baidu': [
|
||||
{'key': 'api_key', 'label': 'API Key', 'type': 'password', 'required': True},
|
||||
{'key': 'intelligent_url', 'label': '智能检索接口', 'type': 'text', 'required': False,
|
||||
'default': 'https://qianfan.baidubce.com/v2/ai_search/chat/completions'},
|
||||
{'key': 'web_search_url', 'label': '网页搜索接口', 'type': 'text', 'required': False,
|
||||
'default': 'https://qianfan.baidubce.com/v2/ai_search/web_search'},
|
||||
{'key': 'vdb_access_key', 'label': 'VDB Access Key', 'type': 'text', 'required': False},
|
||||
{'key': 'vdb_secret_key', 'label': 'VDB Secret Key', 'type': 'password', 'required': False},
|
||||
],
|
||||
'searxng': [
|
||||
{'key': 'base_url', 'label': 'SearXNG 地址', 'type': 'text', 'required': False,
|
||||
'default': 'http://localhost:8888'},
|
||||
],
|
||||
'baidu-intelligent': [], # 共享 baidu 配置,无需单独字段
|
||||
'duckduckgo': [],
|
||||
}
|
||||
return schemas.get(name, [])
|
||||
|
||||
|
||||
def mask_key(key: str) -> str:
|
||||
"""脱敏显示密钥"""
|
||||
if not key:
|
||||
return ''
|
||||
if len(key) <= 8:
|
||||
return key[:2] + '****'
|
||||
return key[:6] + '****' + key[-4:]
|
||||
118
hub/quota.py
Normal file
118
hub/quota.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""用量查询 — Tavily 剩余额度 + 百度 VDB 免费额度"""
|
||||
|
||||
import time
|
||||
import hmac
|
||||
import hashlib
|
||||
import datetime
|
||||
import requests
|
||||
|
||||
|
||||
def check_tavily_usage(api_key: str) -> dict:
|
||||
if not api_key:
|
||||
return {'error': '未配置 API Key', 'available': False}
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
'https://api.tavily.com/usage',
|
||||
headers={'Authorization': f'Bearer {api_key}'},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
return {'error': '查询过于频繁(10分钟内限10次)', 'available': False}
|
||||
if resp.status_code != 200:
|
||||
return {'error': f'API 返回 {resp.status_code}', 'available': False}
|
||||
|
||||
data = resp.json()
|
||||
key_data = data.get('key', {})
|
||||
acct = data.get('account', {})
|
||||
|
||||
usage = key_data.get('usage', 0) or 0
|
||||
limit = key_data.get('limit', 0) or 0
|
||||
remaining = max(limit - usage, 0) if limit else 0
|
||||
|
||||
return {
|
||||
'available': True,
|
||||
'key': {'usage': usage, 'limit': limit, 'remaining': remaining},
|
||||
'account': {
|
||||
'plan_limit': acct.get('plan_limit', 0) or 0,
|
||||
'search_usage': acct.get('search_usage', 0) or 0,
|
||||
'paygo_usage': acct.get('paygo_usage', 0) or 0,
|
||||
},
|
||||
}
|
||||
except requests.exceptions.Timeout:
|
||||
return {'error': '请求超时', 'available': False}
|
||||
except requests.exceptions.RequestException as e:
|
||||
return {'error': f'网络错误: {e}', 'available': False}
|
||||
|
||||
|
||||
def _bce_sign(access_key: str, secret_key: str, method: str, path: str,
|
||||
headers: dict, params: dict = None) -> str:
|
||||
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
expiration = 1800
|
||||
|
||||
auth_string = f'bce-auth-v1/{access_key}/{timestamp}/{expiration}'
|
||||
signing_key = hmac.new(
|
||||
secret_key.encode('utf-8'),
|
||||
auth_string.encode('utf-8'),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
|
||||
signed_headers = sorted(headers.keys(), key=lambda k: k.lower())
|
||||
canonical_headers = ''.join(
|
||||
f'{k.lower()}:{headers[k].strip()}\n' for k in signed_headers
|
||||
)
|
||||
canonical_uri = path
|
||||
|
||||
if params:
|
||||
canonical_query = '&'.join(
|
||||
f'{k}={v}' for k, v in sorted(params.items())
|
||||
)
|
||||
else:
|
||||
canonical_query = ''
|
||||
|
||||
canonical_request = f'{method}\n{canonical_uri}\n{canonical_query}\n{canonical_headers}'
|
||||
|
||||
signature = hmac.new(
|
||||
signing_key.encode('utf-8'),
|
||||
canonical_request.encode('utf-8'),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
|
||||
return (f'bce-auth-v1/{access_key}/{timestamp}/{expiration}/'
|
||||
f'{";".join(k.lower() for k in signed_headers)}/{signature}')
|
||||
|
||||
|
||||
def check_baidu_vdb_quota(access_key: str, secret_key: str) -> dict:
|
||||
if not access_key or not secret_key:
|
||||
return {'error': '未配置 VDB Access Key / Secret Key', 'available': False}
|
||||
|
||||
host = 'vdb.bj.baidubce.com'
|
||||
path = '/v1/vdb/instance/freeQuota'
|
||||
method = 'GET'
|
||||
|
||||
headers = {
|
||||
'host': host,
|
||||
}
|
||||
auth = _bce_sign(access_key, secret_key, method, path, headers)
|
||||
headers['Authorization'] = auth
|
||||
headers['Content-Type'] = 'application/json'
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
f'https://{host}{path}',
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {'error': f'API 返回 {resp.status_code}', 'available': False}
|
||||
|
||||
data = resp.json()
|
||||
free_quota = data.get('freeQuota', 0)
|
||||
return {
|
||||
'available': True,
|
||||
'freeQuota': free_quota,
|
||||
}
|
||||
except requests.exceptions.Timeout:
|
||||
return {'error': '请求超时', 'available': False}
|
||||
except requests.exceptions.RequestException as e:
|
||||
return {'error': f'网络错误: {e}', 'available': False}
|
||||
112
hub/router.py
Normal file
112
hub/router.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""搜索路由器 — 多源路由 + 结果去重合并"""
|
||||
|
||||
import time
|
||||
from providers.base import SearchResult
|
||||
|
||||
|
||||
class SearchRouter:
|
||||
"""搜索路由器"""
|
||||
|
||||
def __init__(self, providers: dict):
|
||||
"""
|
||||
providers: {name: provider_instance}
|
||||
"""
|
||||
self.providers = providers
|
||||
|
||||
def search(self, query: str, source='auto', max_results=10):
|
||||
"""
|
||||
统一搜索入口
|
||||
|
||||
source 取值:
|
||||
- 'auto' : 自动选择最优可用源
|
||||
- 'tavily' : 指定单个源
|
||||
- 'tavily,baidu' : 多源合并
|
||||
"""
|
||||
start = time.time()
|
||||
|
||||
if not source or source == 'auto':
|
||||
results = self._auto_search(query, max_results)
|
||||
used_source = results[0].source if results else None
|
||||
elif ',' in source:
|
||||
sources = [s.strip() for s in source.split(',') if s.strip()]
|
||||
results = self._multi_search(query, sources, max_results)
|
||||
used_source = source
|
||||
else:
|
||||
provider = self.providers.get(source)
|
||||
if not provider or not provider.is_available():
|
||||
return {
|
||||
'query': query,
|
||||
'results': [],
|
||||
'total': 0,
|
||||
'source': source,
|
||||
'elapsed': round(time.time() - start, 2),
|
||||
'error': f'搜索源 "{source}" 不可用',
|
||||
}
|
||||
raw = provider.search(query, max_results)
|
||||
results = raw
|
||||
used_source = source
|
||||
|
||||
elapsed = round(time.time() - start, 2)
|
||||
return {
|
||||
'query': query,
|
||||
'results': [r.to_dict() for r in results],
|
||||
'total': len(results),
|
||||
'source': used_source,
|
||||
'elapsed': elapsed,
|
||||
}
|
||||
|
||||
def _auto_search(self, query, max_results):
|
||||
"""自动选择:按优先级 fallback,成功即返回"""
|
||||
sorted_providers = sorted(
|
||||
[p for p in self.providers.values() if p.is_available() and p.enabled],
|
||||
key=lambda p: p.priority,
|
||||
)
|
||||
|
||||
for provider in sorted_providers:
|
||||
try:
|
||||
results = provider.search(query, max_results)
|
||||
if results:
|
||||
return self._dedup(results, max_results)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
def _multi_search(self, query, sources, max_results):
|
||||
"""多源并发搜索"""
|
||||
all_results = []
|
||||
for name in sources:
|
||||
provider = self.providers.get(name)
|
||||
if not provider or not provider.is_available():
|
||||
continue
|
||||
try:
|
||||
# 每个源搜 max_results 条
|
||||
per_source = max(max_results // len(sources), 3)
|
||||
results = provider.search(query, per_source)
|
||||
all_results.extend(results)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return self._dedup(all_results, max_results)
|
||||
|
||||
def get_sources(self):
|
||||
"""返回所有搜索源状态"""
|
||||
return [
|
||||
p.get_status()
|
||||
for p in sorted(self.providers.values(), key=lambda p: p.priority)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _dedup(results, max_results):
|
||||
"""URL 去重 + 按分数排序"""
|
||||
seen = set()
|
||||
unique = []
|
||||
for r in results:
|
||||
key = r.url or r.title
|
||||
if key and key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(r)
|
||||
|
||||
# 按分数降序排列
|
||||
unique.sort(key=lambda r: r.score, reverse=True)
|
||||
return unique[:max_results]
|
||||
Reference in New Issue
Block a user