KT AIVLE SCHOOL

KT AICE Asso 자격증을 위한 코드 정리

AICE ASSO

라이브러리 호출

# Scikit Learn : 머신러닝 학습을 위한 최고의 파이썬 패키지

import sklearn as sk

# Pandas : 데이터 처리를 위한 파이썬 패키지

import pandas as pd

# Numpy : 행렬 연산을 위한 파이썬 패키지

import numpy as np

# Matplotlib : 데이터 시각화를 위한 파이썬 패키지

import matplotlib.pyplot as plt

# Seaborn : 데이터 시각화를 위한 파이썬 패키지

import seaborn as sns

데이터 호출

# CSV 파일

## index_col : 특정 열을 인덱스로 지정

## header : 헤더(컬럼) 행 설정 -> 주로 데이터에 첫 줄이 헤더가 아닌

## encoding : 파일 인코딩 형식 지정하여 호출

df = pd.read_csv('example.csv', index_col=0, header=None, e)

# excel 파일

## header : 헤더(컬럼) 행 설정

## index_col : 특정 열을 인덱스로 지정

## sheat : 파일 시트 지정하여 호출

df = pd.read_excel('example.xlsx', index_col=0, header=None)

AICE ASSO 1

# JSON 파일

## orient : 특정 객체로 직접 변환

df = pd.read_json('example.json', orient='records')

# SQL 데이터베이스 파일

## connection : 연결한 데이터베이스의 테이블

## index_col : 특정 열을 인덱스로 지정

import sqlite3

connection = sqlite3.connect('example.db')

df = pd.read_sql_query("SELECT * FROM example_table", conne

# HTML 테이블 파일

df = pd.read_html('http://example.com', header=1) 데이터 전처리

# 데이터 타입 변경

df = df.astype({'col': 'dtype'}) # int, float, ...

# 데이터 값 변경

df.replace({'col' : {'value1': 'value2'}}, inplace = True)

# 중복 데이터 제거

df = df.drop_duplicates()

# 데이터 결측치 및 이상치 확인 및 처리

df.isna().sum() # 결측치 확인

df = df.dropna(axis=0) # 결측행 삭제

## 특정 값 이상의 데이터 삭제

df = df.drop(df[df['col'] > 'value'].index)

# df = df[df['Speed_Per_Hour'] < 300]

# 데이터 삭제

## Axis : 1이면 컬럼, 0이면 행 삭제

df.drop(['col'], axis=1, inplace=True)

AICE ASSO 2

# Label Encoding

from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()

cat_cols = df.select_dtypes(include='object').columns

for col in cat_cols:

df[col] = enc.fit_transform(df[col])

# One-Hot Encoding

## Pandas get_dummies()

### columns : 변환하고자 하는 컬럼 선택

### dummy_na : Nan을 생성하여 결측값도 인코딩 처리

### prefix : 생성할 dummy variable의 column 이름 앞에 붙을 pref ### prefix_sep : prefix와 범주 사이의 separate 지정(default : _ ### drop_first : k개의 범주에서 k-1개로 dummy variable을 만들 때 df = pd.get_dummies(df, columns=[])

## Scikit Learn OneHotEncoder

from sklearn.preprocessing import OneHotEncoder enc = OneHotEncoder(sparse=False)

df_enc = enc.fit_transform(df)

## Keras to_categorical -> 단 데이터가 정수형이여야만 한다. df_enc = to_categorical(df)

# Standard Scaler

scaler = StandardScaler()

X_train_sc = scaler.fit_transform(X_train)

X_valid_sc = scaler.transform(X_valid)

# Min-Max Scaler

scaler = MinMaxScaler()

X_train_sc = scaler.fit_transform(X_train)

X_valid_sc = scaler.transform(X_valid)

데이터 시각화

AICE ASSO 3

TIP! matplotlib 및 seaborn의 공식 홈페이지에서 파라미터 확인! (정말 생소한 파라미터가 출제될 수도 있다!)

코드

# Bar

## pandas

df['col'].value_counts().plot(kind='bar')

# Line

## pandas

df['col'].value_counts().plot(kind='line')

# Box

## pandas

df['col'].value_counts().plot(kind='box')

# Pie

## pandas

df['col'].value_counts().plot(kind='pie')

# Scatter

## pandas

df['col'].value_counts().plot(kind='scatter')

# Joint

## seaborn

sns.jointplot(data=df, x='col', y='col', kind='scatter')

# Hist

## seaborn

sns.histplot(data=df, x='col')

# KDE

## seaborn

sns.kdeplot(data=df, x='col')

# Count

sns.countplot(data=df, x='col')

AICE ASSO 4

# Heatmap

sns.heatmap(data=df.corr(), x='col', annot=True)

데이터 분할

# 라이브러리 호출

from sklearn.model_selection import train_test_split

target = 'Target'

X = df_preset.drop([target], axis=1)

y = df_preset[target]

## test_size : Test 데이터로 가져가고자 하는 비율

## random_state : 랜덤 시드 값

## stratify : 지정한 데이터에 대해서 비율에 맞게 나누는 옵션 X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_ rando strat

ML 모델링

TIP! 모델의 파라미터는 scikit learn 공식 문서를 참고할 것!

Classification

# 분류

## Logistic Regression

from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

## KNeighbors Classifier

from sklearn.neighbors import KNeighborsClassifier

AICE ASSO 5

model = KNeighborsClassifier(n_neighbors=5) model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

## Decision Tree Classifier

from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(max_depth=10,random_state model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

## Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=3, random_st model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

## XGBoost

from xgboost import XGBClassifier

model = XGBClassifier(n_estimators=3, random_state=42) model.fit(X_train,y_train)

score = model.score(X_valid, y_valid)

## LightGBM

# 여기에 답안코드를 작성하세요.(lightgbm)

from lightgbm import LGBMClassifier

model = LGBMClassifier(n_estimators=3, random_state=42) model.fit(X_train, y_train.values.ravel()) score = model.score(X_valid, y_valid)

# 분류 평가

from sklearn.metrics import classification_report, confu cm = confusion_matrix(y_valid, y_pred)

plt.figure(figsize=(8, 6))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cba

AICE ASSO 6

plt.title(f'Confusion Matrix')

plt.xlabel('Predicted Label')

plt.ylabel('True Label')

plt.show()

print(classification_report(y_valid, y_pred))

Regression

# 회귀

## Linear Regression

from sklearn.linear_model import LinearRegression

model = LinearRegression()

model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

y_pred = model.predict(X_valid)

## Decision Tree Regression

from sklearn.tree import DecisionTreeRegressor model = DecisionTreeRegressor(max_depth=5,

model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

y_pred = model.predict(X_valid)

## Random Forest

from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(max_depth=12,n_estimators= model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

y_pred = model.predict(X_valid)

## XGBoost

from xgboost import XGBRegressor

AICE ASSO 7

model = XGBRegressor(n_estimators=100)

model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

y_pred = model.predict(X_valid)

## LightGBM

from lightgbm import LGBMRegressor

model = LGBMRegressor()

model.fit(X_train, y_train)

score = model.score(X_valid, y_valid)

y_pred = model.predict(X_valid)

# 회귀 평가

from sklearn.metrics import mean_absolute_error, mean_sq

mae = mean_absolute_error(y_valid, y_pred)

mse = mean_squared_error(y_valid, y_pred)

DL 모델링

TIP! 문제의 형태에 따른 출력층의 활성화 함수 설정!

회귀 : linear

이진 분류 : sigmoid

다중 분류 : softmax

TIP! 문제의 유형과 타겟의 형태에 따른 손실함수 설정!

회귀 : mse

이진 분류 : binary_crossentropy

다중 분류(원핫) : categorical_crossentropy

다중 분류(정수) : sparse_categorical_crossentropy

import tensorflow as tf

from tensorflow.keras.models import Sequential, load_model from tensorflow.keras.layers import Dense, Activation, Drop from tensorflow.keras.callbacks import EarlyStopping, Model

AICE ASSO 8

from tensorflow.keras.utils import to_categorical tf.random.set_seed(1)

# DL 모델 구성

model = Sequential([

Input(shape=(X_train.shape[1],)),

Dense(64),

BatchNormalization(),

Activation('relu'),

Dense(128),

BatchNormalization(),

Activation('relu'),

Dropout(0.2),

Dense(128),

BatchNormalization(),

Activation('relu'),

Dense(1)

])

model.compile(optimizer='adam',loss='mae',metrics=['mae', '

# 콜백함수 정의

es = EarlyStopping(monitor='val_loss', mode='min', restore_ mc = ModelCheckpoint('navi_model_cp.keras', monitor='val_lo verbose=1, save_best_only=True)

# 모델 학습하며, 로그를 history에 저장

history = model.fit(X_train, y_train,

epochs=30,

batch_size=16,

validation_split=.2,

callbacks=[es, mc])

# 모델 저장

model.save('navi_model.h5')

AICE ASSO 9

# 그래프를 그리기 위한 데이터 추출

epochs = range(1, len(history.history['mse']) + 1) train_mse = history.history['mse']

validation_mse = history.history['val_mse']

# MSE 그래프 그리기

plt.figure(figsize=(8, 5))

plt.plot(epochs, train_mse, 'bo-', label='mse') # 'bo-'는 파 plt.plot(epochs, validation_mse, 'go-', label='val_mse') # plt.title('Model MSE')

plt.xlabel('Epochs')

plt.ylabel('MSE')

plt.legend()

plt.grid(True)

plt.show()

AICE ASSO 10

'KT AIVLE SCHOOL' 카테고리의 다른 글

7차 미니프로젝트 1 - 클라우드 운영 환경 구축– 장고 배포 후 서비스(prod) (0)	2024.06.05
LangChain 기초 정리 (2)	2024.06.04
RAG - 모델이 질문과 모델의 답변을 기억하기 및 이어지는 질문과 답변하기 (0)	2024.06.04
RAG - 외부 VectorDB와 LLM 연결하기 (0)	2024.06.04
ChatGPT(LLM)와 RAG를 이용하여 '예비 KT 에이블러들을 위한 QA 챗봇 모델' 만들기 (2)	2024.06.04

Contents

새소식

인기 검색어

KT AICE Asso 자격증을 위한 코드 정리

'KT AIVLE SCHOOL' 카테고리의 다른 글

당신이 좋아할만한 콘텐츠

티스토리툴바