feat: fix analysis; update prompt

Gowtham1729 · Dec 18, 2023 · e4afd3b · e4afd3b
1 parent acd9555
commit e4afd3b
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 163 deletions.
diff --git a/applications/data_analyzer/analyzer.py b/applications/data_analyzer/analyzer.py
@@ -37,7 +37,7 @@
 PROMPT = """"
 When you receive news data related to a stock, formatted as {"category": "str", "symbol": "str", "src": "str", "src_url": "str", "headline": "str", "summary": "str"}, analyze this information and produce a single JSON response. The output should strictly follow the structure {"sentiment_score": "float", "need_attention": "bool", "reason": "str"} and must adhere to these guidelines:
 
-Sentiment Score: The sentiment score should range between -1 and 1, where -1 is highly negative, 0 is neutral, and 1 is highly positive. Assign extreme values (+1 or -1) only for news that is defined as 'highly impactful', based on its potential influence on the stock's performance and public sentiment. The stock here is indicated by the "symbol" field.
+Sentiment Score: The sentiment score should range between -10 and 10, where -10 is highly negative, 0 is neutral, and 10 is highly positive. Assign extreme values (+10 or -10) only for news that is defined as 'highly impactful', based on its potential influence on the stock's performance and public sentiment. The stock here is indicated by the "symbol" field.
 
 Need Attention: The "need_attention" field should be a boolean (true or false). Set it to true if the news is important and necessary for someone owning that stock in their portfolio to read. This field indicates that the news item is critical for stockholders to understand potential changes in stock value or company status.
 
@@ -144,47 +144,6 @@ def fetch_news(self, id_: int) -> News:
             reason=item[11],
         )
 
-    def analyze_symbol(self, symbol: str, news: List[News]) -> Analysis:
-        return Analysis(
-            category=news[0].category,
-            symbol=symbol,
-            date=datetime.now().date().isoformat(),
-            average_sentiment=sum(
-                [item.sentiment for item in news if item.sentiment != 0]
-            )
-            / len(news),
-            total_news=len(news),
-            positive_news=len([item for item in news if item.sentiment > 0]),
-            negative_news=len([item for item in news if item.sentiment < 0]),
-            neutral_news=len([item for item in news if item.sentiment == 0]),
-            need_attention=any([item for item in news if item.need_attention]),
-        )
-
-    def insert_analysis(self, analysis: List[Analysis]):
-        self.cursor.executemany(
-            """
-            INSERT INTO news_analysis
-            (category, symbol, date, average_sentiment, total_news, positive_news, negative_news, neutral_news, need_attention)
-            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
-            ON CONFLICT (symbol, date) DO UPDATE SET average_sentiment = EXCLUDED.average_sentiment, total_news = EXCLUDED.total_news, positive_news = EXCLUDED.positive_news, negative_news = EXCLUDED.negative_news, neutral_news = EXCLUDED.neutral_news, need_attention = EXCLUDED.need_attention
-            """,
-            [
-                (
-                    item.category,
-                    item.symbol,
-                    item.date,
-                    item.average_sentiment,
-                    item.total_news,
-                    item.positive_news,
-                    item.negative_news,
-                    item.neutral_news,
-                    item.need_attention,
-                )
-                for item in analysis
-            ],
-        )
-        self.connection.commit()
-
     def update_ai_news_analysis(self, news: List[News]):
         self.cursor.executemany(
             """
@@ -261,7 +220,7 @@ def update_daily_analysis(self, news: News, date: str):
                 average_sentiment = 
                 (
                     SELECT 
-                        AVG(sentiment) 
+                        COALESCE(AVG(sentiment) FILTER ( WHERE sentiment != 0 ), 0) AS avg_sentiment
                     FROM 
                         news_news 
                     WHERE 
@@ -314,35 +273,6 @@ def analyze_news(self, ch, method, properties, body: str):
         logger.info(f"Analysis Updated!")
         ch.basic_ack(delivery_tag=method.delivery_tag)
 
-    def analyze(self):
-        logger.info(f"Fetching all News...")
-        all_news = self.get_today_news()
-        logger.info(f"News Count: {len(all_news)}")
-        news = {}
-        for item in all_news:
-            if item.symbol not in news:
-                news[item.symbol] = []
-            news[item.symbol].append(item)
-
-        logger.info(f"News AI Analysis...")
-        for symbol, news_list in news.items():
-            for item in news_list:
-                sentiment, need_attention, reason = self.ai_analysis(item)
-                item.sentiment = sentiment
-                item.need_attention = need_attention
-                item.reason = reason
-
-        logger.info(f"Updating AI Analysis...")
-        self.update_ai_news_analysis(all_news)
-
-        logger.info(f"Analyzing Each Symbol...")
-        analysis = [self.analyze_symbol(symbol, news) for symbol, news in news.items()]
-        logger.info(f"Analysis: {analysis}")
-
-        logger.info(f"Inserting Analysis...")
-        self.insert_analysis(analysis)
-        logger.info(f"Analysis Finished!")
-
 
 if __name__ == "__main__":
     analyzer = Analyzer()

diff --git a/applications/data_analyzer/tests/analyzer_test.py b/applications/data_analyzer/tests/analyzer_test.py
@@ -1,91 +0,0 @@
-from datetime import datetime
-from unittest.mock import patch
-
-import pytest
-from data_analyzer.analyzer import Analyzer
-from data_analyzer.utils.models import Analysis, News
-
-APPLE_NEWS_1 = News(
-    id=1,
-    category="equity",
-    symbol="AAPL",
-    src="example",
-    src_url="https://www.example.com",
-    img_src_url="https://example.com/img.png",
-    headline="Apple launches new iPhone 12",
-    summary="Apple launches new iPhone 12",
-    publish_time="2020-05-17T10:00:00.000Z",
-    sentiment=1,
-    need_attention=False,
-    reason=None,
-)
-
-APPLE_NEWS_2 = News(
-    id=2,
-    category="equity",
-    symbol="AAPL",
-    src="example",
-    src_url="https://www.example.com",
-    img_src_url="https://example.com/img.png",
-    headline="Apple launches new iPad 5",
-    summary="Apple launches new iPad 5",
-    publish_time="2020-05-17T10:00:00.000Z",
-    sentiment=1,
-    need_attention=True,
-    reason=None,
-)
-
-
-@pytest.fixture()
-def analyzer():
-    with patch("psycopg.connect") as mock_connect:
-        mock_connect.return_value.cursor.return_value.fetchone.return_value = [
-            1,
-            "equity",
-            "AAPL",
-            "example",
-            "https://www.example.com",
-            "https://example.com/img.png",
-            "Apple launches new iPhone 12",
-            "Apple launches new iPhone 12",
-            "2020-05-17T10:00:00.000Z",
-            1,
-            False,
-            None,
-        ]
-        yield Analyzer()
-
-
-class TestAnalyzer:
-    @pytest.mark.parametrize(
-        "symbol, news, expected",
-        [
-            (
-                "AAPL",
-                [
-                    APPLE_NEWS_1,
-                    APPLE_NEWS_2,
-                ],
-                Analysis(
-                    category="equity",
-                    symbol="AAPL",
-                    date=datetime.now().date().isoformat(),
-                    average_sentiment=1,
-                    total_news=2,
-                    positive_news=2,
-                    negative_news=0,
-                    neutral_news=0,
-                    need_attention=True,
-                ),
-            ),
-        ],
-    )
-    def test__analyze_symbol(self, symbol, news, expected):
-        analyzer = Analyzer()
-        actual = analyzer.analyze_symbol(symbol, news)
-        assert actual.__dict__ == expected.__dict__
-
-    def test__fetch_news(self, analyzer):
-        expected_news = APPLE_NEWS_1
-        result = analyzer.fetch_news(1)
-        assert result.__dict__ == expected_news.__dict__

diff --git a/applications/data_analyzer/update_analysis.sql b/applications/data_analyzer/update_analysis.sql
@@ -0,0 +1,28 @@
+WITH news_stats AS (
+    SELECT
+        COALESCE(AVG(sentiment) FILTER ( WHERE sentiment != 0 ), 0) AS avg_sentiment,
+        COUNT(*) AS total_news_count,
+        COUNT(*) FILTER (WHERE sentiment > 0) AS positive_news_count,
+        COUNT(*) FILTER (WHERE sentiment < 0) AS negative_news_count,
+        COUNT(*) FILTER (WHERE sentiment = 0) AS neutral_news_count,
+        BOOL_OR(need_attention) AS any_need_attention
+    FROM
+        news_news
+    WHERE
+        DATE(publish_time) = '2023-12-17'
+        AND symbol = 'NFLX'
+)
+UPDATE
+    news_analysis
+SET
+    average_sentiment = news_stats.avg_sentiment,
+    total_news = news_stats.total_news_count,
+    positive_news = news_stats.positive_news_count,
+    negative_news = news_stats.negative_news_count,
+    neutral_news = news_stats.neutral_news_count,
+    need_attention = news_stats.any_need_attention
+FROM
+    news_stats
+WHERE
+    news_analysis.symbol = 'NFLX'
+    AND news_analysis.date = '2023-12-17';