Add emotion analysis

f71c9f3b · Omar Hajjaji · f0e249b8 · f71c9f3b · f71c9f3b · f71c9f3b
Commit f71c9f3b authored 1 year ago by Omar Hajjaji
--- a/.env.dist
+++ b/.env.dist
@@ -4,7 +4,7 @@ LOG_FORMAT="json"
 DEFAULT_MAX_LENGTH=50
 DEFAULT_NUM_RETURN_SEQUENCES=1
 DEFAULT_NO_REPEAT_NGRAM_SIZE=2
-ENABLED_MODELS='["gpt2", "nlptownsentiment", "nltksentiment", "textblobsentiment", "mock"]'
+ENABLED_MODELS='["gpt2", "nlptownsentiment", "nltksentiment", "textblobsentiment", "mock","robertaemotion"]'
 DEFAULT_TOP_K=50
 DEFAULT_TOP_P="0.95"
 DEFAULT_TEMPERATURE="0.8"
--- a/docker-compose-local.yml
+++ b/docker-compose-local.yml
-version: '3.3'
+version: "3.3"
 services:
  cwai_api:
    restart: always
@@ -7,7 +7,7 @@ services:
      context: .
      dockerfile: ./Dockerfile
      target: api
-    env_file: 
-      - .env
+    env_file:
+      - .env.dist
    ports:
      - "8000:8000"
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ requests
 uvicorn[standard]
 nltk
 textblob
+scipy
--- a/src/drivers/nlptownsentiment.py
+++ b/src/drivers/nlptownsentiment.py
@@ -5,7 +5,8 @@ import torch
 from utils.logger import log_msg

 _sentiment_model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
-_sentiment_model = AutoModelForSequenceClassification.from_pretrained(_sentiment_model_name)
+_sentiment_model = AutoModelForSequenceClassification.from_pretrained(
+    _sentiment_model_name)
 _sentiment_tokenizer = AutoTokenizer.from_pretrained(_sentiment_model_name)

 emotion_mapping = {
@@ -16,6 +17,7 @@ emotion_mapping = {
    5: 'love'
 }

+
 class NlptownsentimentDriver(ModelDriver):
    def load_model(self):
        log_msg("INFO", "[Nlptownsentiment] loading model...")
@@ -26,4 +28,4 @@ class NlptownsentimentDriver(ModelDriver):
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(probs).item() + 1
        predicted_emotion = emotion_mapping[predicted_class]
-        return { "response": ["The predicted emotion is: {}, score: {}".format(predicted_emotion, predicted_class)], "score": predicted_class }
+        return {"response": ["The predicted emotion is: {}, score: {}".format(predicted_emotion, predicted_class)], "score": predicted_class}
--- a/src/drivers/robertaemotion.py
+++ b/src/drivers/robertaemotion.py
+# Choosing a pretrained model from hugging face because it's more
+# rewarding in terms of accuracy than a basic model that i would train from scratch
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoTokenizer
+import numpy as np
+from scipy.special import softmax
+import csv
+import urllib.request
+from drivers.model_driver import ModelDriver
+from models.prompt import Prompt
+from utils.logger import log_msg
+
+
+task = 'emotion'
+MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+# label mapping
+labels = ["anger", "joy", "optimism", "sadness"]
+
+
+# Saving the model and tokenizer so that we don't have to load them each time we run the container
+model.save_pretrained(MODEL)
+tokenizer.save_pretrained(MODEL)
+
+
+class RobertaemotionDriver(ModelDriver):
+    def load_model(self):
+        log_msg("INFO", "[RobertaEmotion] loading model...")
+
+    def generate_response(self, prompt: Prompt):
+        text = prompt.message
+        encoded_input = tokenizer(text, return_tensors='pt')
+        output = model(**encoded_input)
+        scores = output[0][0].detach().numpy()
+        scores = softmax(scores)
+        ranking = np.argsort(scores)
+        ranking = ranking[::-1]
+        msg = ""
+        for i in range(scores.shape[0]):
+            l = labels[ranking[i]]
+            s = scores[ranking[i]]
+            msg += str(l) + " "+str(np.round(float(s), 4))+"; "
+            print(f"{i+1}) {l} {np.round(float(s), 4)}")
+        return {"response": ["The predicted emotions are: {}".format(msg)]}