Análise de bolhas em IA requer métricas específicas que capturam:
---
{
"total_funding": "valor total captado",
"valuation": "valuation atual",
"funding_rounds": "número de rodadas",
"investors_count": "quantidade de investidores",
"last_funding_date": "data última rodada"
}
Alternativa Gratuita: PitchBook (dados limitados), CB Insights reports
API Example:
import requests
def fetch_ai_company_funding(company_name, api_key):
url = f"https://api.crunchbase.com/v4/entities/organizations/{company_name}"
headers = {"X-cb-user-key": api_key}
response = requests.get(url, headers=headers)
data = response.json()
return {
"total_funding_usd": data.get("total_funding_usd"),
"last_funding_type": data.get("last_funding_type"),
"num_funding_rounds": data.get("num_funding_rounds")
}
---
---
import yfinance as yf
def track_ai_stocks():
tickers = ['NVDA', 'MSFT', 'GOOGL', 'AI', 'PLTR']
data = {}
for ticker in tickers:
stock = yf.Ticker(ticker)
info = stock.info
data[ticker] = {
"market_cap": info.get("marketCap"),
"pe_ratio": info.get("trailingPE"),
"revenue_growth": info.get("revenueGrowth"),
"price_to_sales": info.get("priceToSalesTrailing12Months")
}
return data
---
import requests
def fetch_huggingface_metrics(model_id="gpt2"):
url = f"https://huggingface.co/api/models/{model_id}"
response = requests.get(url)
data = response.json()
return {
"downloads": data.get("downloads", 0),
"likes": data.get("likes", 0),
"tags": data.get("tags", []),
"created_at": data.get("createdAt")
}
def get_trending_ai_models(limit=10):
url = f"https://huggingface.co/api/models?sort=trending&limit={limit}"
response = requests.get(url)
return response.json()
Métricas chave:
---
AI_REPOS = {
# Modelos de Linguagem
"openai/gpt-3": {"owner": "openai", "repo": "gpt-3"},
"meta-llama": {"owner": "meta-llama", "repo": "llama"},
"anthropic/anthropic-sdk-python": {"owner": "anthropic-ai", "repo": "anthropic-sdk-python"},
# Frameworks
"langchain": {"owner": "langchain-ai", "repo": "langchain"},
"llamaindex": {"owner": "run-llama", "repo": "llama_index"},
"autogen": {"owner": "microsoft", "repo": "autogen"},
# Image Gen
"stable-diffusion": {"owner": "CompVis", "repo": "stable-diffusion"},
"midjourney": {"owner": "midjourney", "repo": "midjourney"},
# Infrastructure
"pytorch": {"owner": "pytorch", "repo": "pytorch"},
"tensorflow": {"owner": "tensorflow", "repo": "tensorflow"},
"transformers": {"owner": "huggingface", "repo": "transformers"}
}
def track_ai_github_activity(repos_dict):
metrics = {}
for name, config in repos_dict.items():
repo_data = fetch_github_metrics(
config["owner"],
config["repo"]
)
metrics[name] = {
"stars": repo_data["stars"],
"forks": repo_data["forks"],
"open_issues": repo_data["open_issues"],
"contributors": repo_data.get("contributors_count", 0),
"star_velocity": calculate_star_velocity(config)
}
return metrics
---
AI_PACKAGES = [
"openai", # OpenAI API
"anthropic", # Claude API
"langchain", # LLM framework
"llama-index", # RAG framework
"transformers", # HuggingFace
"torch", # PyTorch
"tensorflow", # TensorFlow
"stable-diffusion", # Image generation
"autogen-agentchat",# Multi-agent
"chromadb", # Vector DB
"pinecone-client", # Vector DB
"sentence-transformers" # Embeddings
]
async def fetch_pypi_ai_metrics(package):
url = f"https://pypistats.org/api/packages/{package}/recent"
async with httpx.AsyncClient() as client:
response = await client.get(url)
data = response.json()
return {
"package": package,
"downloads_last_day": data["data"]["last_day"],
"downloads_last_week": data["data"]["last_week"],
"downloads_last_month": data["data"]["last_month"]
}
---
const AI_NPM_PACKAGES = [
"openai", // OpenAI SDK
"@anthropic-ai/sdk", // Claude SDK
"langchain", // LangChain JS
"llamaindex", // LlamaIndex JS
"@tensorflow/tfjs", // TensorFlow JS
"replicate", // Replicate API
"ai", // Vercel AI SDK
"@huggingface/inference" // HF Inference
];
async function fetchNPMMetrics(packageName) {
const response = await fetch(
https://api.npmjs.org/downloads/point/last-month/${packageName}
);
const data = await response.json();
return {
package: packageName,
downloads: data.downloads
};
}
---
AI_SUBREDDITS = [
"MachineLearning", # 2.9M members
"artificial", # 1.8M members
"OpenAI", # 1.2M members
"LocalLLaMA", # 500K members
"StableDiffusion", # 800K members
"ArtificialInteligence", # 1.5M members
"singularity", # 350K members
"agi", # 100K members
"ChatGPT", # 5.2M members
"Bard", # 150K members
"ClaudeAI", # 50K members
"LangChain", # 80K members
"AITooling" # 30K members
]
async def analyze_ai_reddit_sentiment():
results = {}
for subreddit in AI_SUBREDDITS:
data = await fetch_reddit_sentiment(subreddit, limit=100)
results[subreddit] = {
"members": await get_subreddit_members(subreddit),
"avg_score": data["avg_score"],
"posts_per_day": data["total_posts"],
"sentiment": data["sentiment"],
"top_keywords": extract_keywords(data["posts"])
}
return results
---
AI_KEYWORDS = [
"#AI", "#ArtificialIntelligence", "#MachineLearning",
"#ChatGPT", "#GPT4", "#GPT5", "#OpenAI",
"#Claude", "#Anthropic", "#AGI",
"#StableDiffusion", "#Midjourney", "#DALL-E",
"#LangChain", "#LLM", "#GenerativeAI",
"Sam Altman", "Dario Amodei", "Demis Hassabis"
]
Requires Twitter API v2 (Academic or Enterprise)
def track_ai_twitter_mentions(bearer_token):
import tweepy
client = tweepy.Client(bearer_token=bearer_token)
metrics = {}
for keyword in AI_KEYWORDS:
tweets = client.search_recent_tweets(
query=keyword,
max_results=100,
tweet_fields=['created_at', 'public_metrics']
)
metrics[keyword] = {
"tweet_count": len(tweets.data) if tweets.data else 0,
"total_impressions": sum(
t.public_metrics['impression_count']
for t in tweets.data
) if tweets.data else 0
}
return metrics
---
import pytrends
from pytrends.request import TrendReq
AI_TRENDS_KEYWORDS = [
"ChatGPT",
"Claude AI",
"GPT-4",
"Artificial Intelligence",
"Machine Learning",
"AGI",
"Generative AI",
"Stable Diffusion",
"Midjourney",
"OpenAI",
"Anthropic"
]
def fetch_ai_google_trends(keywords, timeframe='today 12-m'):
pytrend = TrendReq(hl='en-US', tz=360)
# Comparar keywords (max 5 por vez)
trends_data = {}
for i in range(0, len(keywords), 5):
batch = keywords[i:i+5]
pytrend.build_payload(batch, timeframe=timeframe)
# Interest over time
interest = pytrend.interest_over_time()
# Related queries
related = pytrend.related_queries()
trends_data.update({
kw: {
"interest_over_time": interest[kw].tolist() if kw in interest else [],
"rising_queries": related[kw]['rising'] if kw in related else [],
"top_queries": related[kw]['top'] if kw in related else []
}
for kw in batch
})
return trends_data
---
import requests
from textblob import TextBlob
def fetch_ai_news(api_key, query="artificial intelligence"):
url = "https://newsapi.org/v2/everything"
params = {
"q": query,
"apiKey": api_key,
"language": "en",
"sortBy": "publishedAt",
"pageSize": 100
}
response = requests.get(url, params=params)
articles = response.json()["articles"]
# Sentiment analysis
sentiments = []
for article in articles:
text = f"{article['title']} {article['description']}"
blob = TextBlob(text)
sentiments.append(blob.sentiment.polarity)
return {
"article_count": len(articles),
"avg_sentiment": sum(sentiments) / len(sentiments),
"positive_ratio": len([s for s in sentiments if s > 0]) / len(sentiments),
"articles": articles
}
---
ARXIV_CATEGORIES = [
"cs.AI", # Artificial Intelligence
"cs.LG", # Machine Learning
"cs.CL", # Computation and Language (NLP)
"cs.CV", # Computer Vision
"cs.NE", # Neural and Evolutionary Computing
"stat.ML" # Machine Learning (Statistics)
]
import arxiv
def track_ai_papers(category="cs.AI", max_results=100):
search = arxiv.Search(
query=f"cat:{category}",
max_results=max_results,
sort_by=arxiv.SortCriterion.SubmittedDate
)
papers = []
for result in search.results():
papers.append({
"title": result.title,
"authors": [a.name for a in result.authors],
"published": result.published,
"updated": result.updated,
"summary": result.summary,
"pdf_url": result.pdf_url,
"citation_count": get_citation_count(result.entry_id)
})
return {
"total_papers": len(papers),
"papers_per_day": calculate_velocity(papers),
"top_authors": get_top_authors(papers),
"papers": papers
}
def detect_breakthrough_vs_incremental(papers):
"""Detecta papers revolucionários vs incrementais"""
scores = []
for paper in papers:
# Indicadores de breakthrough:
# - Títulos com "novel", "breakthrough", "first"
# - Alto número de autores de instituições diferentes
# - Citações rápidas (dentro de 1 mês)
breakthrough_score = 0
if any(word in paper['title'].lower()
for word in ['novel', 'breakthrough', 'first', 'new']):
breakthrough_score += 0.3
if len(paper['authors']) > 10: # Grande colaboração
breakthrough_score += 0.2
# ... outros critérios
scores.append(breakthrough_score)
return scores
---
import requests
def get_paper_influence(paper_id):
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}"
params = {
"fields": "citationCount,influentialCitationCount,citations,references"
}
response = requests.get(url, params=params)
data = response.json()
return {
"citation_count": data.get("citationCount", 0),
"influential_citations": data.get("influentialCitationCount", 0),
"influence_ratio": data.get("influentialCitationCount", 0) /
max(data.get("citationCount", 1), 1)
}
def track_ai_research_velocity():
"""Detecta aceleração em publicações"""
keywords = ["GPT", "transformer", "diffusion model", "AGI", "LLM"]
velocity_metrics = {}
for keyword in keywords:
papers = search_semantic_scholar(keyword, year_start=2020)
# Agrupar por ano/mês
timeline = group_by_period(papers)
velocity_metrics[keyword] = {
"total_papers": len(papers),
"growth_rate": calculate_growth_rate(timeline),
"acceleration": calculate_acceleration(timeline)
}
return velocity_metrics
---
def fetch_sota_leaderboards():
"""Tracking de avanços em benchmarks"""
benchmarks = [
"ImageNet",
"GLUE",
"SuperGLUE",
"SQuAD",
"MMLU",
"HumanEval",
"Anthropic's Eval Suite"
]
# Papers With Code não tem API pública, mas pode scrape
# ou usar datasets do GitHub
# Detectar plateau vs breakthrough
for benchmark in benchmarks:
scores_history = get_benchmark_history(benchmark)
# Detectar se estamos em plateau (possível fim de paradigma)
is_plateau = detect_plateau(scores_history)
# Ou se houve breakthrough recente
recent_breakthrough = detect_breakthrough(scores_history)
---
API_PROVIDERS = {
"openai": {
"models": ["gpt-4", "gpt-3.5-turbo", "dall-e-3"],
"pricing_endpoint": "https://openai.com/pricing",
"status": "https://status.openai.com"
},
"anthropic": {
"models": ["claude-3-opus", "claude-3-sonnet", "claude-3-haiku"],
"pricing_endpoint": "https://www.anthropic.com/pricing"
},
"cohere": {
"models": ["command", "embed"],
},
"replicate": {
"models": ["stable-diffusion", "llama-2"],
}
}
Proxies para medir adoção:
1. Status page incidents (alta demanda = muitos incidents)
2. Pricing changes (reduções = tentando volume)
3. Release velocity de novos modelos
4. SDK downloads (PyPI, NPM)
def estimate_api_usage_from_sdk_downloads():
"""Estima uso de API baseado em downloads de SDKs"""
sdk_packages = {
"openai": "openai", # PyPI
"anthropic": "anthropic",
"cohere": "cohere",
"replicate": "replicate"
}
usage_estimates = {}
for provider, package in sdk_packages.items():
downloads = fetch_pypi_downloads(package, period="last-month")
# Estimativa: 1 download = 10-100 requests/month (conservador)
estimated_requests = downloads * 50
usage_estimates[provider] = {
"sdk_downloads": downloads,
"estimated_monthly_requests": estimated_requests
}
return usage_estimates
---
def track_ai_product_launches():
"""Detecta saturação de produtos similares"""
# Product Hunt API (requer OAuth)
categories = ["AI", "Machine Learning", "Productivity", "Developer Tools"]
ai_products = []
for category in categories:
products = fetch_product_hunt(category, days=30)
# Filtrar produtos de IA
ai_products.extend([
p for p in products
if any(keyword in p['tagline'].lower()
for keyword in ['ai', 'gpt', 'llm', 'chatbot', 'ml'])
])
# Detectar saturação
saturation_score = len(ai_products) / 30 # produtos por dia
# Detectar commoditização
similar_products = group_similar_products(ai_products)
commoditization = len([g for g in similar_products if len(g) > 5])
return {
"new_products_per_day": saturation_score,
"commoditization_index": commoditization / len(similar_products),
"top_categories": get_top_categories(ai_products)
}
---
def track_gpu_pricing():
"""Preços de GPU como proxy de demanda por IA"""
gpu_providers = {
"aws": {
"p4d.24xlarge": "A100 80GB",
"p5.48xlarge": "H100"
},
"gcp": {
"a2-ultragpu-8g": "A100 80GB"
},
"azure": {
"Standard_ND96amsr_A100_v4": "A100 80GB"
}
}
# Scrape pricing pages ou use APIs de billing
# Detectar:
# - Price increases = alta demanda
# - Availability issues = escassez
# - New regions = expansão
return gpu_pricing_trends
---
def track_nvidia_as_ai_proxy():
"""NVIDIA é termômetro do mercado de IA"""
nvda = yf.Ticker("NVDA")
# Earnings calls mention "AI" frequency
# Data center revenue growth
# GPU shipments
metrics = {
"stock_price": nvda.info['currentPrice'],
"market_cap": nvda.info['marketCap'],
"pe_ratio": nvda.info['trailingPE'],
"datacenter_revenue": get_datacenter_revenue(nvda),
"ai_mentions_in_earnings": count_ai_mentions(nvda)
}
# Bubble indicator: P/E ratio muito acima da média histórica
historical_pe = nvda.info.get('averagePE', 30)
current_pe = metrics['pe_ratio']
pe_bubble_score = (current_pe - historical_pe) / historical_pe
return {
**metrics,
"pe_bubble_score": pe_bubble_score
}
---
def track_ai_job_postings():
"""Jobs em IA como indicador de demanda real"""
job_titles = [
"Machine Learning Engineer",
"AI Engineer",
"LLM Engineer",
"Prompt Engineer",
"AI Researcher",
"ML Ops Engineer"
]
# LinkedIn API (requer partnership)
# Alternativa: scrape ou use Adzuna API (gratuita)
job_metrics = {}
for title in job_titles:
jobs = search_jobs(title, location="United States")
job_metrics[title] = {
"total_postings": len(jobs),
"avg_salary": calculate_avg_salary(jobs),
"top_companies": get_top_hiring_companies(jobs),
"growth_rate": compare_to_previous_month(title)
}
# Bubble indicator:
# - Explosão de "Prompt Engineer" = hype
# - Muitos jobs mas poucos candidatos qualificados = insustentável
return job_metrics
---
class AIBubbleCalculator(BubbleCalculator):
"""Calculadora especializada para IA/AGI"""
@staticmethod
def calculate_adoption(data: Dict) -> float:
"""Adoção real em IA"""
score = 0.0
# 1. API Usage (30%) - mais importante que GitHub para IA
if "api_usage" in data:
sdk_downloads = data["api_usage"]["sdk_downloads"]
score += min(sdk_downloads / 1000000, 1) * 0.30
# 2. GitHub Activity (25%)
if "github" in data:
gh = data["github"]
score += min(gh["stars"] / 100000, 1) * 0.15
score += min(gh["forks"] / 20000, 1) * 0.10
# 3. PyPI Downloads (25%)
if "pypi" in data:
downloads = data["pypi"]["downloads"]
score += min(downloads / 50000000, 1) * 0.25
# 4. HuggingFace (20%) - específico de IA
if "huggingface" in data:
hf = data["huggingface"]
score += min(hf["downloads"] / 10000000, 1) * 0.20
return min(score, 1.0)
@staticmethod
def calculate_hype(data: Dict) -> float:
"""Hype em IA - mais volátil"""
score = 0.0
# 1. Twitter Mentions (30%) - IA é muito buzzword-driven
if "twitter" in data:
mentions = data["twitter"]["mention_velocity"]
score += min(mentions / 100000, 1) * 0.30
# 2. Google Trends (25%)
if "trends" in data:
interest = data["trends"]["interest_score"]
score += interest * 0.25
# 3. News Sentiment (25%)
if "news" in data:
sentiment = data["news"]["positive_ratio"]
score += sentiment * 0.25
# 4. Product Launches (20%) - saturação
if "products" in data:
new_products = data["products"]["launches_per_week"]
# Alto número = commoditização = possível peak
score += min(new_products / 50, 1) * 0.20
return min(score, 1.0)
@staticmethod
def calculate_investment(data: Dict) -> float:
"""Investimento em IA"""
score = 0.0
# 1. VC Funding (40%)
if "funding" in data:
total_funding = data["funding"]["total_raised"]
score += min(total_funding / 10000000000, 1) * 0.40 # $10B
# 2. Valuations (30%)
if "valuation" in data:
val = data["valuation"]["current"]
revenue = data["valuation"].get("annual_revenue", 1)
# Revenue multiple > 50x = bubble territory
multiple = val / revenue
multiple_score = min(multiple / 50, 1)
score += multiple_score * 0.30
# 3. NVIDIA/GPU stocks (30%) - proxy único de IA
if "gpu_market" in data:
nvda_pe = data["gpu_market"]["pe_ratio"]
historical_pe = data["gpu_market"]["historical_avg_pe"]
# P/E > 2x historical = overheating
pe_score = min((nvda_pe / historical_pe) / 2, 1)
score += pe_score * 0.30
return min(score, 1.0)
@staticmethod
def calculate_research_momentum(data: Dict) -> float:
"""Momentum de pesquisa - indicador único de IA"""
if "arxiv" not in data:
return 0.5
arxiv = data["arxiv"]
# Papers por dia
papers_per_day = arxiv["papers_per_day"]
# Breakthrough vs incremental
breakthrough_ratio = arxiv["breakthrough_papers"] / max(arxiv["total_papers"], 1)
# Acceleration vs plateau
is_accelerating = arxiv["growth_rate"] > 0.1
momentum = (
min(papers_per_day / 50, 1) * 0.4 +
breakthrough_ratio * 0.3 +
(1.0 if is_accelerating else 0.3) * 0.3
)
return momentum
@classmethod
def calculate_bubble_index(cls, metrics: Dict) -> float:
"""Índice de bolha para IA - pesos diferentes"""
adoption = metrics["adoption"]
hype = metrics["hype"]
investment = metrics["investment"]
research = metrics.get("research_momentum", 0.5)
# Divergência: chave em IA porque há MUITO hype
divergence = abs(hype - adoption) * (2.0 if hype > adoption else 0.5)
# Pesos ajustados para IA:
index = (
0.30 * divergence + # Divergência é crítica
0.25 * (investment ** 1.5) + # Investimento pode ser irracional
0.20 * (1 - research) + # Plateau em pesquisa = fim de ciclo
0.15 * hype + # Hype absoluto importa
0.10 * (1 - adoption) # Baixa adoção com alto hype = perigo
)
return min(index * 1.5, 1.0)
---
1. Adoção Real (40%) - API SDK downloads (PyPI + NPM) - HuggingFace model downloads - GitHub stars/forks - Enterprise adoption surveys 2. Hype (30%) - Twitter mentions velocity - Google Trends spike - News article sentiment - Product launches per week 3. Investimento (20%) - VC funding velocity - Valuation multiples - NVIDIA P/E ratio - GPU pricing trends 4. Pesquisa (10%) - arXiv papers per day - Breakthrough vs incremental ratio - Plateau detection em benchmarks - Citation velocity ---
def detect_ai_bubble_red_flags(data):
red_flags = []
# 1. Commoditização extrema
if data["products"]["similar_products_per_week"] > 20:
red_flags.append("CRITICAL: Commoditização - 20+ produtos similares/semana")
# 2. Plateau em benchmarks
if data["research"]["breakthrough_ratio"] < 0.05:
red_flags.append("WARNING: Plateau em pesquisa - 95% incremental")
# 3. Hype >> Adoção
if data["hype"] > data["adoption"] * 2:
red_flags.append("CRITICAL: Hype 2x maior que adoção real")
# 4. Valuations insanas
if data["investment"]["avg_revenue_multiple"] > 100:
red_flags.append("CRITICAL: Revenue multiples > 100x")
# 5. "Prompt Engineer" mania
if data["jobs"]["prompt_engineer_postings"] > data["jobs"]["ml_engineer_postings"]:
red_flags.append("WARNING: Mais 'Prompt Engineers' que ML Engineers")
# 6. GPU shortage prolongada
if data["infrastructure"]["gpu_availability"] < 0.3:
red_flags.append("WARNING: GPU shortage - demanda insustentável")
return red_flags
---
✅ HuggingFace API ✅ arXiv API ✅ Semantic Scholar ✅ PyPI Stats ✅ NPM Registry ✅ GitHub API (com token) ✅ Reddit API ✅ Google Trends (pytrends)
💰 Crunchbase ($) 💰 Twitter API v2 ($$) 💰 News API ($) 💰 LinkedIn Jobs ($$)
🔍 Papers With Code 🔍 Product Hunt 🔍 NVIDIA Investor Relations 🔍 Cloud provider pricing pages --- Quer que eu implemente algum desses fetchers específicos ou crie um dashboard especializado para IA/AGI?