Files
funstat-mcp/core/search_all_translation.py
2025-11-01 21:58:03 +08:00

116 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""完整搜索翻译相关用户并保存到文件"""
import sys
sys.path.insert(0, '.')
from server import FunstatMCPServer
import asyncio
import re
import json
from datetime import datetime
async def main():
server = FunstatMCPServer()
await server.initialize()
results = []
seen = set()
keywords = [
'翻译', 'translation', 'translate', 'translator', 'translators',
'译者', '翻译组', '翻译团队', '字幕组', '汉化', '汉化组',
'subtitle', 'subtitles', 'fansub', 'scanlation',
'localization', '本地化', 'interpreting', 'interpretation',
'translation group', 'subtitle group', 'translation team'
]
print(f"🔍 开始搜索 {len(keywords)} 个关键词")
print(f"⚠️ 每个关键词返回最多15条结果funstat BOT限制")
print(f"💡 通过多关键词覆盖更多用户")
print("")
for i, kw in enumerate(keywords, 1):
print(f"[{i:2d}/{len(keywords)}] {kw:25s}", end=' ', flush=True)
try:
res = await server.send_command_and_wait(f'/search {kw}', use_cache=False)
ids = re.findall(r'`(\d+)`', res)
usernames = re.findall(r'@(\w+)', res) + re.findall(r't\.me/(\w+)', res)
new_count = 0
for uid in ids:
key = f"ID:{uid}"
if key not in seen:
seen.add(key)
results.append({'type': 'id', 'value': uid, 'keyword': kw})
new_count += 1
for username in usernames:
if username:
key = f"@{username}"
if key not in seen:
seen.add(key)
results.append({'type': 'username', 'value': username, 'keyword': kw})
new_count += 1
print(f"+{new_count:2d} → 总计: {len(results):3d}")
await asyncio.sleep(0.5)
except Exception as e:
print(f"失败: {e}")
# 保存文件
txt_file = '/Users/lucas/chat--1003255561049/translation_users.txt'
json_file = '/Users/lucas/chat--1003255561049/translation_users.json'
with open(txt_file, 'w', encoding='utf-8') as f:
f.write("=" * 80 + "\n")
f.write("翻译相关用户/群组完整列表\n")
f.write("=" * 80 + "\n")
f.write(f"总数: {len(results)}\n")
f.write(f"搜索时间: {datetime.now()}\n")
f.write(f"数据来源: funstat BOT (@openaiw_bot)\n")
f.write("=" * 80 + "\n\n")
for i, item in enumerate(results, 1):
if item['type'] == 'id':
f.write(f"{i:4d}. ID: {item['value']:15s} (来源: {item['keyword']})\n")
else:
f.write(f"{i:4d}. @{item['value']:30s} (来源: {item['keyword']})\n")
with open(json_file, 'w', encoding='utf-8') as f:
json.dump({
'total': len(results),
'timestamp': str(datetime.now()),
'results': results
}, f, ensure_ascii=False, indent=2)
print("")
print("=" * 80)
print(f"✅ 搜索完成!共找到 {len(results)} 条独特记录")
print("=" * 80)
print(f"📄 文本文件: {txt_file}")
print(f"📄 JSON文件: {json_file}")
print("")
# 显示前100条
print("📋 前 100 条结果:")
print("")
for i, item in enumerate(results[:100], 1):
if item['type'] == 'id':
print(f"{i:3d}. ID: {item['value']}")
else:
print(f"{i:3d}. @{item['value']}")
if len(results) > 100:
print(f"\n... 还有 {len(results) - 100} 条记录,请查看文件")
await server.client.disconnect()
print(f"\n🎯 最终统计: {len(results)} 条独特记录")
print(f"📊 ID数量: {sum(1 for r in results if r['type'] == 'id')}")
print(f"👤 用户名数量: {sum(1 for r in results if r['type'] == 'username')}")
if __name__ == '__main__':
asyncio.run(main())