데이터 분석기

import streamlit as st
from pymongo import MongoClient
import spacy
from collections import Counter
import matplotlib.pyplot as plt

# MongoDB에 연결
client = MongoClient('mongodb://localhost:27017/')

# 데이터베이스 선택
db = client['mydatabase_spacy']

# 컬렉션 선택
collection = db['user_text']

def save_text_data(text):
    data = {
        'text': text
    }
    collection.insert_one(data)

def analyze_text_data():
    texts = collection.find()
    nlp = spacy.load('en_core_web_sm')
    keywords = []
    for text in texts:
        doc = nlp(text['text'])
        keywords.extend([token.lemma_ for token in doc if token.is_alpha and not token.is_stop])
    keyword_count = Counter(keywords)
    return keyword_count

# 텍스트 입력
st.header('텍스트 데이터 분석')
text = st.text_area('텍스트 입력')
if st.button('저장'):
    save_text_data(text)
    st.success('텍스트가 저장되었습니다.')

# 텍스트 데이터 분석 및 시각화
st.header('텍스트 데이터 분석 결과')
keyword_count = analyze_text_data()
if keyword_count:
    st.subheader('키워드 빈도수')
    keyword_labels, keyword_values = zip(*keyword_count.most_common(10))
    plt.bar(keyword_labels, keyword_values)
    plt.xticks(rotation=45)
    st.pyplot(plt)
else:
    st.write('분석할 텍스트 데이터가 없습니다.')