chore: initial commit

This commit is contained in:
你的用户名
2025-11-01 21:58:03 +08:00
commit a05a7dd40e
65 changed files with 16590 additions and 0 deletions

227
scripts/funstat_auto_query.py Executable file
View File

@@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""
007翻译客户自动获取脚本
通过 Funstat BOT 自动查询高质量客户
"""
import asyncio
import json
import re
from datetime import datetime
from telethon import TelegramClient
from telethon.tl.types import Message
# 配置
API_ID = 28618843
API_HASH = "476b6116881049c68d65f108ed0c1d6d"
BOT_USERNAME = "@openaiw_bot"
SESSION_PATH = "/Users/lucas/telegram_sessions/funstat_bot" # 使用现有的 session
# 查询关键词列表
QUERIES = [
{"type": "需求类", "keyword": "求推荐翻译"},
{"type": "需求类", "keyword": "翻译软件推荐"},
{"type": "痛点类", "keyword": "翻译不准"},
{"type": "痛点类", "keyword": "翻译太慢"},
{"type": "对比类", "keyword": "KT翻译"},
{"type": "对比类", "keyword": "翻译软件对比"},
]
class FunstatAutoQuery:
"""Funstat 自动查询客户端"""
def __init__(self):
self.client = None
self.bot = None
self.results = []
async def initialize(self):
"""初始化客户端"""
print("🚀 初始化 Telegram 客户端...")
self.client = TelegramClient(SESSION_PATH, API_ID, API_HASH)
await self.client.start()
self.bot = await self.client.get_entity(BOT_USERNAME)
print(f"✅ 已连接到: {BOT_USERNAME}")
async def send_command_and_wait(self, command: str, timeout: int = 15) -> str:
"""发送命令并等待响应"""
print(f"\n📤 发送命令: {command}")
# 获取发送前的最新消息ID
messages = await self.client.get_messages(self.bot, limit=1)
last_msg_id = messages[0].id if messages else 0
# 发送命令
await self.client.send_message(self.bot, command)
# 等待响应
for i in range(timeout):
await asyncio.sleep(1)
new_messages = await self.client.get_messages(
self.bot,
limit=5,
min_id=last_msg_id
)
# 查找BOT的回复
for msg in reversed(new_messages):
if msg.text and not msg.out:
print(f"✅ 收到响应 ({len(msg.text)} 字符)")
return msg.text
print("⏰ 响应超时")
return ""
async def parse_search_results(self, text: str) -> list:
"""解析搜索结果"""
results = []
# 提取群组/用户信息
# 格式可能是: @username, 用户名, 群组名等
lines = text.split('\n')
for line in lines:
# 跳过空行和标题行
if not line.strip() or line.startswith('===') or line.startswith('---'):
continue
# 提取用户名 @xxx
usernames = re.findall(r'@(\w+)', line)
# 提取群组链接
group_links = re.findall(r't\.me/(\w+)', line)
if usernames or group_links:
results.append({
'raw_text': line,
'usernames': usernames,
'group_links': group_links,
})
return results
async def query_keyword(self, keyword: str, query_type: str):
"""查询单个关键词"""
print(f"\n{'='*60}")
print(f"🔍 查询类型: {query_type}")
print(f"🔍 关键词: {keyword}")
print(f"{'='*60}")
# 使用 /text 命令搜索
command = f"/text {keyword}"
response = await self.send_command_and_wait(command)
if not response:
print("❌ 未收到响应")
return
# 解析结果
parsed = await self.parse_search_results(response)
result = {
'type': query_type,
'keyword': keyword,
'response': response,
'parsed_count': len(parsed),
'parsed_data': parsed,
'timestamp': datetime.now().isoformat()
}
self.results.append(result)
print(f"✅ 找到 {len(parsed)} 条相关数据")
# 显示前3条样例
if parsed:
print("\n📋 样例数据:")
for i, item in enumerate(parsed[:3], 1):
print(f" {i}. {item['raw_text'][:100]}")
async def run_queries(self):
"""执行所有查询"""
print("\n🚀 开始批量查询...")
print(f"📊 查询计划: {len(QUERIES)} 个关键词")
for i, query in enumerate(QUERIES, 1):
print(f"\n⏳ 进度: {i}/{len(QUERIES)}")
await self.query_keyword(query['keyword'], query['type'])
# 避免请求过快休息2秒
if i < len(QUERIES):
print("⏸️ 休息 2 秒...")
await asyncio.sleep(2)
def save_results(self):
"""保存结果到文件"""
output_file = "funstat_query_results.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(self.results, f, ensure_ascii=False, indent=2)
print(f"\n💾 结果已保存到: {output_file}")
def generate_summary(self):
"""生成结果摘要"""
print("\n" + "="*60)
print("📊 查询结果摘要")
print("="*60)
total_parsed = sum(r['parsed_count'] for r in self.results)
print(f"\n✅ 查询完成:")
print(f" - 查询关键词数: {len(self.results)}")
print(f" - 发现数据总数: {total_parsed}")
print(f"\n📋 按类型统计:")
type_stats = {}
for result in self.results:
t = result['type']
if t not in type_stats:
type_stats[t] = {'count': 0, 'keywords': []}
type_stats[t]['count'] += result['parsed_count']
type_stats[t]['keywords'].append(result['keyword'])
for query_type, stats in type_stats.items():
print(f" - {query_type}: {stats['count']} 条数据")
for kw in stats['keywords']:
print(f"{kw}")
print(f"\n📁 详细数据已保存到 JSON 文件")
print(f"📝 接下来需要:")
print(f" 1. 分析 JSON 文件中的数据")
print(f" 2. 提取用户信息")
print(f" 3. 使用 funstat_user_info 验证用户价值")
print(f" 4. 生成最终客户清单")
async def cleanup(self):
"""清理资源"""
if self.client:
await self.client.disconnect()
print("\n👋 客户端已断开")
async def main():
"""主函数"""
print("="*60)
print("🎯 007翻译客户自动获取系统")
print("="*60)
client = FunstatAutoQuery()
try:
await client.initialize()
await client.run_queries()
client.save_results()
client.generate_summary()
except Exception as e:
print(f"\n❌ 错误: {e}")
import traceback
traceback.print_exc()
finally:
await client.cleanup()
if __name__ == "__main__":
asyncio.run(main())