summarizer.py•2.44 kB
import os
import sys
import asyncio
from tqdm import tqdm
from agents import Runner
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from ingest.constants import PATH_DATA
from ingest.oai_agents import DataPrepAgent
from ingest.helper import get_sec_filings_files, get_sec_filings_file_content, write_summary
class Summarizer:
def __init__(self):
self.data_prep_agent = DataPrepAgent()
async def summarize(self, data, company, filing_type, filing_date):
summary_step_1 = await Runner.run(
self.data_prep_agent.summarizer_agent,
input = f'''
Company: {company}
Report Type: {filing_type}
Filing Date: {filing_date}
Data: {data}
'''
)
summary_step_2 = await Runner.run(
self.data_prep_agent.verifier_agent,
input = f'''
Company: {company}
Report Type: {filing_type}
Filing Date: {filing_date}
Summary: {summary_step_1.final_output}
Data: {data}
'''
)
return summary_step_2.final_output
class Data:
def __init__(self):
self.summarizer = Summarizer()
async def run(self, file_name):
company = file_name.split('_')[0]
file_type = 'yearly' if file_name.split('_')[1].split('0')[1] == 'K' else 'quarterly'
filing_date = file_name.split('_')[-1].split('.')[0]
data = get_sec_filings_file_content(os.path.join(PATH_DATA, company, file_name))
summary = await self.summarizer.summarize(data, company, file_type, filing_date)
write_summary(summary, os.path.join(PATH_DATA, company, file_name.replace('.txt', '_summary.txt')))
async def process_file(data, file_path):
file_name = file_path.split('/')[-1]
await data.run(file_name)
async def process_ticker(ticker, data):
print(f'Processing {ticker}...')
files = get_sec_filings_files(ticker)
tasks = [process_file(data, file) for file in files]
await asyncio.gather(*tasks)
print(f'Completed {ticker} - {len(files)} files')
async def main():
data = Data()
tickers = ['AAPL', 'AMZN', 'FL', 'KO', 'META', 'MSFT', 'NVDA', 'TSLA']
tasks = [process_ticker(ticker, data) for ticker in tickers]
await asyncio.gather(*tasks)
if __name__ == '__main__':
asyncio.run(main())