Skip to content

Commit

Permalink
feat: fix analysis; update prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
Gowtham1729 committed Dec 18, 2023
1 parent acd9555 commit e4afd3b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 163 deletions.
74 changes: 2 additions & 72 deletions applications/data_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
PROMPT = """"
When you receive news data related to a stock, formatted as {"category": "str", "symbol": "str", "src": "str", "src_url": "str", "headline": "str", "summary": "str"}, analyze this information and produce a single JSON response. The output should strictly follow the structure {"sentiment_score": "float", "need_attention": "bool", "reason": "str"} and must adhere to these guidelines:
Sentiment Score: The sentiment score should range between -1 and 1, where -1 is highly negative, 0 is neutral, and 1 is highly positive. Assign extreme values (+1 or -1) only for news that is defined as 'highly impactful', based on its potential influence on the stock's performance and public sentiment. The stock here is indicated by the "symbol" field.
Sentiment Score: The sentiment score should range between -10 and 10, where -10 is highly negative, 0 is neutral, and 10 is highly positive. Assign extreme values (+10 or -10) only for news that is defined as 'highly impactful', based on its potential influence on the stock's performance and public sentiment. The stock here is indicated by the "symbol" field.
Need Attention: The "need_attention" field should be a boolean (true or false). Set it to true if the news is important and necessary for someone owning that stock in their portfolio to read. This field indicates that the news item is critical for stockholders to understand potential changes in stock value or company status.
Expand Down Expand Up @@ -144,47 +144,6 @@ def fetch_news(self, id_: int) -> News:
reason=item[11],
)

def analyze_symbol(self, symbol: str, news: List[News]) -> Analysis:
return Analysis(
category=news[0].category,
symbol=symbol,
date=datetime.now().date().isoformat(),
average_sentiment=sum(
[item.sentiment for item in news if item.sentiment != 0]
)
/ len(news),
total_news=len(news),
positive_news=len([item for item in news if item.sentiment > 0]),
negative_news=len([item for item in news if item.sentiment < 0]),
neutral_news=len([item for item in news if item.sentiment == 0]),
need_attention=any([item for item in news if item.need_attention]),
)

def insert_analysis(self, analysis: List[Analysis]):
self.cursor.executemany(
"""
INSERT INTO news_analysis
(category, symbol, date, average_sentiment, total_news, positive_news, negative_news, neutral_news, need_attention)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (symbol, date) DO UPDATE SET average_sentiment = EXCLUDED.average_sentiment, total_news = EXCLUDED.total_news, positive_news = EXCLUDED.positive_news, negative_news = EXCLUDED.negative_news, neutral_news = EXCLUDED.neutral_news, need_attention = EXCLUDED.need_attention
""",
[
(
item.category,
item.symbol,
item.date,
item.average_sentiment,
item.total_news,
item.positive_news,
item.negative_news,
item.neutral_news,
item.need_attention,
)
for item in analysis
],
)
self.connection.commit()

def update_ai_news_analysis(self, news: List[News]):
self.cursor.executemany(
"""
Expand Down Expand Up @@ -261,7 +220,7 @@ def update_daily_analysis(self, news: News, date: str):
average_sentiment =
(
SELECT
AVG(sentiment)
COALESCE(AVG(sentiment) FILTER ( WHERE sentiment != 0 ), 0) AS avg_sentiment
FROM
news_news
WHERE
Expand Down Expand Up @@ -314,35 +273,6 @@ def analyze_news(self, ch, method, properties, body: str):
logger.info(f"Analysis Updated!")
ch.basic_ack(delivery_tag=method.delivery_tag)

def analyze(self):
logger.info(f"Fetching all News...")
all_news = self.get_today_news()
logger.info(f"News Count: {len(all_news)}")
news = {}
for item in all_news:
if item.symbol not in news:
news[item.symbol] = []
news[item.symbol].append(item)

logger.info(f"News AI Analysis...")
for symbol, news_list in news.items():
for item in news_list:
sentiment, need_attention, reason = self.ai_analysis(item)
item.sentiment = sentiment
item.need_attention = need_attention
item.reason = reason

logger.info(f"Updating AI Analysis...")
self.update_ai_news_analysis(all_news)

logger.info(f"Analyzing Each Symbol...")
analysis = [self.analyze_symbol(symbol, news) for symbol, news in news.items()]
logger.info(f"Analysis: {analysis}")

logger.info(f"Inserting Analysis...")
self.insert_analysis(analysis)
logger.info(f"Analysis Finished!")


if __name__ == "__main__":
analyzer = Analyzer()
Expand Down
91 changes: 0 additions & 91 deletions applications/data_analyzer/tests/analyzer_test.py
Original file line number Diff line number Diff line change
@@ -1,91 +0,0 @@
from datetime import datetime
from unittest.mock import patch

import pytest
from data_analyzer.analyzer import Analyzer
from data_analyzer.utils.models import Analysis, News

APPLE_NEWS_1 = News(
id=1,
category="equity",
symbol="AAPL",
src="example",
src_url="https://www.example.com",
img_src_url="https://example.com/img.png",
headline="Apple launches new iPhone 12",
summary="Apple launches new iPhone 12",
publish_time="2020-05-17T10:00:00.000Z",
sentiment=1,
need_attention=False,
reason=None,
)

APPLE_NEWS_2 = News(
id=2,
category="equity",
symbol="AAPL",
src="example",
src_url="https://www.example.com",
img_src_url="https://example.com/img.png",
headline="Apple launches new iPad 5",
summary="Apple launches new iPad 5",
publish_time="2020-05-17T10:00:00.000Z",
sentiment=1,
need_attention=True,
reason=None,
)


@pytest.fixture()
def analyzer():
with patch("psycopg.connect") as mock_connect:
mock_connect.return_value.cursor.return_value.fetchone.return_value = [
1,
"equity",
"AAPL",
"example",
"https://www.example.com",
"https://example.com/img.png",
"Apple launches new iPhone 12",
"Apple launches new iPhone 12",
"2020-05-17T10:00:00.000Z",
1,
False,
None,
]
yield Analyzer()


class TestAnalyzer:
@pytest.mark.parametrize(
"symbol, news, expected",
[
(
"AAPL",
[
APPLE_NEWS_1,
APPLE_NEWS_2,
],
Analysis(
category="equity",
symbol="AAPL",
date=datetime.now().date().isoformat(),
average_sentiment=1,
total_news=2,
positive_news=2,
negative_news=0,
neutral_news=0,
need_attention=True,
),
),
],
)
def test__analyze_symbol(self, symbol, news, expected):
analyzer = Analyzer()
actual = analyzer.analyze_symbol(symbol, news)
assert actual.__dict__ == expected.__dict__

def test__fetch_news(self, analyzer):
expected_news = APPLE_NEWS_1
result = analyzer.fetch_news(1)
assert result.__dict__ == expected_news.__dict__
28 changes: 28 additions & 0 deletions applications/data_analyzer/update_analysis.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
WITH news_stats AS (
SELECT
COALESCE(AVG(sentiment) FILTER ( WHERE sentiment != 0 ), 0) AS avg_sentiment,
COUNT(*) AS total_news_count,
COUNT(*) FILTER (WHERE sentiment > 0) AS positive_news_count,
COUNT(*) FILTER (WHERE sentiment < 0) AS negative_news_count,
COUNT(*) FILTER (WHERE sentiment = 0) AS neutral_news_count,
BOOL_OR(need_attention) AS any_need_attention
FROM
news_news
WHERE
DATE(publish_time) = '2023-12-17'
AND symbol = 'NFLX'
)
UPDATE
news_analysis
SET
average_sentiment = news_stats.avg_sentiment,
total_news = news_stats.total_news_count,
positive_news = news_stats.positive_news_count,
negative_news = news_stats.negative_news_count,
neutral_news = news_stats.neutral_news_count,
need_attention = news_stats.any_need_attention
FROM
news_stats
WHERE
news_analysis.symbol = 'NFLX'
AND news_analysis.date = '2023-12-17';

0 comments on commit e4afd3b

Please sign in to comment.