Sentiment Analysis
Sentiment analysis determines the emotional tone of text, classifying it as positive, negative, or neutral with various levels of granularity and aspect-based analysis.
Rule-Based Sentiment
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
class RuleBasedSentiment:
def __init__(self):
self.vader = SentimentIntensityAnalyzer()
def textblob_sentiment(self, text: str) -> dict:
blob = TextBlob(text)
return {
"polarity": blob.sentiment.polarity,
"subjectivity": blob.sentiment.subjectivity,
"label": "positive" if blob.sentiment.polarity > 0 else
"negative" if blob.sentiment.polarity < 0 else "neutral"
}
def vader_sentiment(self, text: str) -> dict:
scores = self.vader.polarity_scores(text)
return {
"positive": scores["pos"],
"negative": scores["neg"],
"neutral": scores["neu"],
"compound": scores["compound"],
"label": "positive" if scores["compound"] >= 0.05 else
"negative" if scores["compound"] <= -0.05 else "neutral"
}
def ensemble(self, text: str) -> dict:
tb = self.textblob_sentiment(text)
vd = self.vader_sentiment(text)
return {
"textblob": tb,
"vader": vd,
"ensemble_label": tb["label"] if tb["label"] == vd["label"] else "neutral",
"confidence": abs(vd["compound"])
}
# Usage
analyzer = RuleBasedSentiment()
result = analyzer.ensemble("This product is amazing! Best purchase ever.")
# {"ensemble_label": "positive", "confidence": 0.85}
ML-Based Sentiment Classifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
import numpy as np
class MLSentimentClassifier:
def __init__(self):
self.pipeline = Pipeline([
('tfidf', TfidfVectorizer(
max_features=50000,
ngram_range=(1, 3),
sublinear_tf=True
)),
('classifier', LogisticRegression(
C=1.0,
max_iter=1000,
class_weight='balanced'
))
])
def train(self, texts: list, labels: list):
self.pipeline.fit(texts, labels)
cv_scores = cross_val_score(self.pipeline, texts, labels, cv=5, scoring='f1_weighted')
return {
"cv_f1_mean": cv_scores.mean(),
"cv_f1_std": cv_scores.std()
}
def predict(self, texts: list) -> list:
return self.pipeline.predict(texts)
def predict_proba(self, texts: list) -> np.ndarray:
return self.pipeline.predict_proba(texts)
def get_top_features(self, label: str, n: int = 10) -> list:
feature_names = self.pipeline.named_steps['tfidf'].get_feature_names_out()
coef = self.pipeline.named_steps['classifier'].coef_[0]
if label == "negative":
coef = -coef
top_indices = np.argsort(coef)[-n:]
return [(feature_names[i], coef[i]) for i in reversed(top_indices)]
# Usage
classifier = MLSentimentClassifier()
texts = ["Great product!", "Terrible service", "Amazing experience"]
labels = ["positive", "negative", "positive"]
classifier.train(texts * 1000, labels * 1000)
Transformer-Based Sentiment
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch
class TransformerSentiment:
def __init__(self, model_name: str = "cardiffnlp/twitter-roberta-base-sentiment-latest"):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.pipe = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
def analyze(self, text: str) -> dict:
result = self.pipe(text)[0]
return {
"label": result["label"],
"score": round(result["score"], 4),
"text": text
}
def analyze_batch(self, texts: list) -> list:
results = self.pipe(texts, batch_size=32)
return [
{"label": r["label"], "score": round(r["score"], 4), "text": t}
for r, t in zip(results, texts)
]
def analyze_with_confidence(self, text: str, threshold: float = 0.7) -> dict:
result = self.analyze(text)
return {
**result,
"is_confident": result["score"] >= threshold,
"needs_review": result["score"] < threshold
}
# Usage
sentiment = TransformerSentiment()
result = sentiment.analyze("I absolutely love this restaurant! The food was delicious.")
# {"label": "positive", "score": 0.98, "text": "..."}
Aspect-Based Sentiment Analysis
class AspectSentimentAnalyzer:
def __init__(self, llm):
self.llm = llm
self.aspects = {
"product": ["product", "item", "quality", "material"],
"service": ["service", "staff", "support", "help"],
"price": ["price", "cost", "value", "expensive", "cheap"],
"delivery": ["delivery", "shipping", "arrived", "fast"]
}
def extract_aspects(self, text: str) -> list:
prompt = f"""Extract sentiment aspects from this text:
Text: {text}
Return JSON with aspects and their sentiments."""
response = self.llm.invoke(prompt).content
return self._parse_aspects(response)
def rule_based_aspects(self, text: str) -> dict:
text_lower = text.lower()
results = {}
for aspect, keywords in self.aspects.items():
if any(kw in text_lower for kw in keywords):
results[aspect] = self._get_aspect_sentiment(text, keywords)
return results
def _get_aspect_sentiment(self, text: str, keywords: list) -> dict:
sentences = text.split('.')
relevant_sentences = [
s for s in sentences
if any(kw in s.lower() for kw in keywords)
]
if not relevant_sentences:
return {"sentiment": "neutral", "confidence": 0.5}
from textblob import TextBlob
sentiments = [TextBlob(s).sentiment.polarity for s in relevant_sentences]
avg_sentiment = sum(sentiments) / len(sentiments)
return {
"sentiment": "positive" if avg_sentiment > 0.1 else
"negative" if avg_sentiment < -0.1 else "neutral",
"confidence": abs(avg_sentiment),
"relevant_text": relevant_sentences
}
def full_analysis(self, text: str) -> dict:
overall = TextBlob(text).sentiment
aspects = self.rule_based_aspects(text)
return {
"text": text,
"overall_sentiment": {
"polarity": overall.polarity,
"subjectivity": overall.subjectivity
},
"aspects": aspects,
"summary": self._generate_summary(aspects)
}
def _generate_summary(self, aspects: dict) -> str:
if not aspects:
return "No specific aspects detected."
positive = [a for a, v in aspects.items() if v["sentiment"] == "positive"]
negative = [a for a, v in aspects.items() if v["sentiment"] == "negative"]
return f"Positive: {', '.join(positive) or 'None'}. Negative: {', '.join(negative) or 'None'}."
# Usage
analyzer = AspectSentimentAnalyzer(llm)
result = analyzer.full_analysis("Great quality product but delivery was slow and expensive.")
# {"aspects": {"product": "positive", "delivery": "negative", "price": "negative"}}
Key Takeaways
- Rule-based methods are fast and interpretable but limited
- ML classifiers learn patterns from labeled data effectively
- Transformers provide state-of-the-art accuracy for most tasks
- Aspect-based analysis reveals sentiment toward specific features
- Ensemble methods combine approaches for robust predictions