Skip to main content

Content Analysis Example

Learn how to use Meibel AI to analyze content, extract insights, and understand patterns in your data.

Overview

This example demonstrates:
  • Sentiment analysis
  • Topic extraction
  • Content categorization
  • Trend identification
  • Summary generation

Setup

from meibelai import Meibelai
import os

client = Meibelai(
    api_key_header=os.getenv("MEIBELAI_API_KEY_HEADER")
)

# Create datasource for content
content_db = client.datasources.create(
    name="Content Analytics",
    description="Content for analysis and insights"
)

Sentiment Analysis

def analyze_sentiment(text: str) -> dict:
    response = client.rag.chat(
        messages=[
            {
                "role": "system",
                "content": "Analyze the sentiment of the text. Return JSON with sentiment (positive/negative/neutral) and score (0-1)."
            },
            {"role": "user", "content": text}
        ],
        execution_control={
            "response_format": "json",
            "temperature": 0.1
        }
    )
    
    return response.choices[0].message.content

# Example usage
reviews = [
    "This product exceeded my expectations! Highly recommend.",
    "The service was okay, nothing special.",
    "Terrible experience, would not buy again."
]

for review in reviews:
    sentiment = analyze_sentiment(review)
    print(f"Review: {review}")
    print(f"Sentiment: {sentiment}\n")

Topic Extraction

def extract_topics(documents: list) -> list:
    # Add documents to datasource
    doc_ids = []
    for i, doc in enumerate(documents):
        element = client.dataelements.create(
            datasource_id=content_db.id,
            name=f"Document {i+1}",
            content=doc,
            metadata={"type": "article"}
        )
        doc_ids.append(element.id)
    
    # Extract topics
    response = client.rag.chat(
        messages=[
            {
                "role": "system",
                "content": "Identify the main topics across all documents. Return as a JSON list with topic name and relevance score."
            },
            {"role": "user", "content": "What are the main topics in these documents?"}
        ],
        datasource_ids=[content_db.id],
        execution_control={
            "response_format": "json"
        }
    )
    
    return response.choices[0].message.content

# Example
articles = [
    "AI is transforming healthcare with predictive diagnostics...",
    "Machine learning models are improving financial forecasting...",
    "Natural language processing enables better customer service..."
]

topics = extract_topics(articles)
print(f"Main topics: {topics}")

Content Categorization

class ContentCategorizer:
    def __init__(self, categories: list):
        self.categories = categories
        
    def categorize(self, content: str) -> dict:
        response = client.rag.chat(
            messages=[
                {
                    "role": "system",
                    "content": f"Categorize the content into one of these categories: {', '.join(self.categories)}. Return JSON with category and confidence."
                },
                {"role": "user", "content": content}
            ],
            execution_control={
                "response_format": "json",
                "temperature": 0.1
            }
        )
        
        return response.choices[0].message.content

# Setup categorizer
categorizer = ContentCategorizer([
    "Technology", "Business", "Health", "Science", "Entertainment"
])

# Categorize content
content = "New study shows that regular exercise improves cognitive function"
category = categorizer.categorize(content)
print(f"Category: {category}")

Trend Analysis

def analyze_trends(time_series_content: list) -> dict:
    # Add time-series content
    for item in time_series_content:
        client.dataelements.create(
            datasource_id=content_db.id,
            name=f"Content - {item['date']}",
            content=item['content'],
            metadata={
                "date": item['date'],
                "type": "time_series"
            }
        )
    
    # Analyze trends
    response = client.rag.chat(
        messages=[
            {
                "role": "system",
                "content": "Analyze trends over time in the content. Identify patterns, changes, and emerging themes."
            },
            {"role": "user", "content": "What trends do you see in this content over time?"}
        ],
        datasource_ids=[content_db.id],
        execution_control={
            "metadata_filter": {"type": "time_series"},
            "enable_tracing": True
        }
    )
    
    return {
        "analysis": response.choices[0].message.content,
        "confidence": response.confidence_score
    }

# Example time series
content_series = [
    {"date": "2024-01", "content": "Focus on AI safety and ethics..."},
    {"date": "2024-02", "content": "Increased adoption of AI in healthcare..."},
    {"date": "2024-03", "content": "Regulatory frameworks for AI emerging..."}
]

trends = analyze_trends(content_series)
print(f"Trend Analysis: {trends}")

Comparative Analysis

def compare_content(content_a: str, content_b: str, aspects: list) -> dict:
    response = client.rag.chat(
        messages=[
            {
                "role": "system",
                "content": f"Compare the two pieces of content across these aspects: {', '.join(aspects)}"
            },
            {
                "role": "user",
                "content": f"Content A: {content_a}\n\nContent B: {content_b}"
            }
        ],
        execution_control={
            "temperature": 0.2,
            "max_tokens": 500
        }
    )
    
    return response.choices[0].message.content

# Compare two products
product_a = "Premium smartphone with 5G, 128GB storage, triple camera"
product_b = "Budget smartphone with 4G, 64GB storage, dual camera"

comparison = compare_content(
    product_a, 
    product_b,
    ["features", "target market", "value proposition"]
)
print(comparison)

Bulk Analysis Pipeline

class ContentAnalysisPipeline:
    def __init__(self, datasource_id: str):
        self.datasource_id = datasource_id
        
    def analyze_batch(self, contents: list) -> list:
        results = []
        
        for content in contents:
            # Run multiple analyses
            sentiment = self.analyze_sentiment(content)
            topics = self.extract_topics(content)
            summary = self.generate_summary(content)
            
            results.append({
                "content": content[:100] + "...",
                "sentiment": sentiment,
                "topics": topics,
                "summary": summary
            })
            
        return results
    
    def analyze_sentiment(self, content: str) -> str:
        # Implementation here
        pass
    
    def extract_topics(self, content: str) -> list:
        # Implementation here
        pass
    
    def generate_summary(self, content: str) -> str:
        response = client.rag.chat(
            messages=[
                {"role": "user", "content": f"Summarize in one sentence: {content}"}
            ],
            execution_control={"max_tokens": 50}
        )
        return response.choices[0].message.content

# Use the pipeline
pipeline = ContentAnalysisPipeline(content_db.id)
results = pipeline.analyze_batch(articles)

Best Practices

  1. Structured Prompts: Use clear, structured prompts for consistent results
  2. JSON Responses: Request JSON format for easier parsing
  3. Batch Processing: Process multiple items together for efficiency
  4. Confidence Tracking: Monitor confidence scores for quality control
  5. Metadata Usage: Leverage metadata for filtering and organization

Next Steps