Sentiment Analysis

Sentiment analysis determines the emotional tone of text, classifying it as positive, negative, or neutral with various levels of granularity and aspect-based analysis.

Rule-Based Sentiment

from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

class RuleBasedSentiment:
    def __init__(self):
        self.vader = SentimentIntensityAnalyzer()

    def textblob_sentiment(self, text: str) -> dict:
        blob = TextBlob(text)
        return {
            "polarity": blob.sentiment.polarity,
            "subjectivity": blob.sentiment.subjectivity,
            "label": "positive" if blob.sentiment.polarity > 0 else
                    "negative" if blob.sentiment.polarity < 0 else "neutral"
        }

    def vader_sentiment(self, text: str) -> dict:
        scores = self.vader.polarity_scores(text)
        return {
            "positive": scores["pos"],
            "negative": scores["neg"],
            "neutral": scores["neu"],
            "compound": scores["compound"],
            "label": "positive" if scores["compound"] >= 0.05 else
                    "negative" if scores["compound"] <= -0.05 else "neutral"
        }

    def ensemble(self, text: str) -> dict:
        tb = self.textblob_sentiment(text)
        vd = self.vader_sentiment(text)
        return {
            "textblob": tb,
            "vader": vd,
            "ensemble_label": tb["label"] if tb["label"] == vd["label"] else "neutral",
            "confidence": abs(vd["compound"])
        }

# Usage
analyzer = RuleBasedSentiment()
result = analyzer.ensemble("This product is amazing! Best purchase ever.")
# {"ensemble_label": "positive", "confidence": 0.85}

ML-Based Sentiment Classifier

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
import numpy as np

class MLSentimentClassifier:
    def __init__(self):
        self.pipeline = Pipeline([
            ('tfidf', TfidfVectorizer(
                max_features=50000,
                ngram_range=(1, 3),
                sublinear_tf=True
            )),
            ('classifier', LogisticRegression(
                C=1.0,
                max_iter=1000,
                class_weight='balanced'
            ))
        ])

    def train(self, texts: list, labels: list):
        self.pipeline.fit(texts, labels)
        cv_scores = cross_val_score(self.pipeline, texts, labels, cv=5, scoring='f1_weighted')
        return {
            "cv_f1_mean": cv_scores.mean(),
            "cv_f1_std": cv_scores.std()
        }

    def predict(self, texts: list) -> list:
        return self.pipeline.predict(texts)

    def predict_proba(self, texts: list) -> np.ndarray:
        return self.pipeline.predict_proba(texts)

    def get_top_features(self, label: str, n: int = 10) -> list:
        feature_names = self.pipeline.named_steps['tfidf'].get_feature_names_out()
        coef = self.pipeline.named_steps['classifier'].coef_[0]
        if label == "negative":
            coef = -coef
        top_indices = np.argsort(coef)[-n:]
        return [(feature_names[i], coef[i]) for i in reversed(top_indices)]

# Usage
classifier = MLSentimentClassifier()
texts = ["Great product!", "Terrible service", "Amazing experience"]
labels = ["positive", "negative", "positive"]
classifier.train(texts * 1000, labels * 1000)

Transformer-Based Sentiment

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch

class TransformerSentiment:
    def __init__(self, model_name: str = "cardiffnlp/twitter-roberta-base-sentiment-latest"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.pipe = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)

    def analyze(self, text: str) -> dict:
        result = self.pipe(text)[0]
        return {
            "label": result["label"],
            "score": round(result["score"], 4),
            "text": text
        }

    def analyze_batch(self, texts: list) -> list:
        results = self.pipe(texts, batch_size=32)
        return [
            {"label": r["label"], "score": round(r["score"], 4), "text": t}
            for r, t in zip(results, texts)
        ]

    def analyze_with_confidence(self, text: str, threshold: float = 0.7) -> dict:
        result = self.analyze(text)
        return {
            **result,
            "is_confident": result["score"] >= threshold,
            "needs_review": result["score"] < threshold
        }

# Usage
sentiment = TransformerSentiment()
result = sentiment.analyze("I absolutely love this restaurant! The food was delicious.")
# {"label": "positive", "score": 0.98, "text": "..."}

Aspect-Based Sentiment Analysis

class AspectSentimentAnalyzer:
    def __init__(self, llm):
        self.llm = llm
        self.aspects = {
            "product": ["product", "item", "quality", "material"],
            "service": ["service", "staff", "support", "help"],
            "price": ["price", "cost", "value", "expensive", "cheap"],
            "delivery": ["delivery", "shipping", "arrived", "fast"]
        }

    def extract_aspects(self, text: str) -> list:
        prompt = f"""Extract sentiment aspects from this text:
        Text: {text}
        Return JSON with aspects and their sentiments."""
        response = self.llm.invoke(prompt).content
        return self._parse_aspects(response)

    def rule_based_aspects(self, text: str) -> dict:
        text_lower = text.lower()
        results = {}
        for aspect, keywords in self.aspects.items():
            if any(kw in text_lower for kw in keywords):
                results[aspect] = self._get_aspect_sentiment(text, keywords)
        return results

    def _get_aspect_sentiment(self, text: str, keywords: list) -> dict:
        sentences = text.split('.')
        relevant_sentences = [
            s for s in sentences
            if any(kw in s.lower() for kw in keywords)
        ]
        if not relevant_sentences:
            return {"sentiment": "neutral", "confidence": 0.5}

        from textblob import TextBlob
        sentiments = [TextBlob(s).sentiment.polarity for s in relevant_sentences]
        avg_sentiment = sum(sentiments) / len(sentiments)
        return {
            "sentiment": "positive" if avg_sentiment > 0.1 else
                        "negative" if avg_sentiment < -0.1 else "neutral",
            "confidence": abs(avg_sentiment),
            "relevant_text": relevant_sentences
        }

    def full_analysis(self, text: str) -> dict:
        overall = TextBlob(text).sentiment
        aspects = self.rule_based_aspects(text)
        return {
            "text": text,
            "overall_sentiment": {
                "polarity": overall.polarity,
                "subjectivity": overall.subjectivity
            },
            "aspects": aspects,
            "summary": self._generate_summary(aspects)
        }

    def _generate_summary(self, aspects: dict) -> str:
        if not aspects:
            return "No specific aspects detected."
        positive = [a for a, v in aspects.items() if v["sentiment"] == "positive"]
        negative = [a for a, v in aspects.items() if v["sentiment"] == "negative"]
        return f"Positive: {', '.join(positive) or 'None'}. Negative: {', '.join(negative) or 'None'}."

# Usage
analyzer = AspectSentimentAnalyzer(llm)
result = analyzer.full_analysis("Great quality product but delivery was slow and expensive.")
# {"aspects": {"product": "positive", "delivery": "negative", "price": "negative"}}

Key Takeaways

Rule-based methods are fast and interpretable but limited
ML classifiers learn patterns from labeled data effectively
Transformers provide state-of-the-art accuracy for most tasks
Aspect-based analysis reveals sentiment toward specific features
Ensemble methods combine approaches for robust predictions

Sentiment Analysis

Sentiment Analysis

Rule-Based Sentiment

ML-Based Sentiment Classifier

Transformer-Based Sentiment

Aspect-Based Sentiment Analysis

Key Takeaways

Premium Content

Need Expert Generative AI Help?