Refactor: predict, train 리팩토링

This commit is contained in:
김진현 2024-09-25 13:55:44 +09:00
parent 20f8d9730a
commit 81b5145b41
6 changed files with 100 additions and 167 deletions

View File

@ -1,17 +1,16 @@
import json import json
from fastapi import APIRouter, HTTPException, Response from fastapi import APIRouter, HTTPException
from schemas.predict_request import PredictRequest from schemas.predict_request import PredictRequest
from schemas.train_request import TrainRequest from schemas.train_request import TrainRequest
from schemas.predict_response import PredictResponse, LabelData from schemas.predict_response import PredictResponse, LabelData
from schemas.train_report_data import ReportData
from services.load_model import load_detection_model from services.load_model import load_detection_model
from services.create_model import save_model from services.create_model import save_model
from utils.dataset_utils import split_data from utils.dataset_utils import split_data
from utils.file_utils import get_dataset_root_path, process_directories, process_image_and_label, join_path, get_model_path from utils.file_utils import get_dataset_root_path, process_directories, process_image_and_label, join_path
from utils.websocket_utils import WebSocketClient, WebSocketConnectionException
from utils.slackMessage import send_slack_message from utils.slackMessage import send_slack_message
import asyncio import asyncio
import time
router = APIRouter() router = APIRouter()
@ -21,32 +20,20 @@ async def detection_predict(request: PredictRequest):
send_slack_message(f"predict 요청{request}", status="success") send_slack_message(f"predict 요청{request}", status="success")
# Spring 서버의 WebSocket URL
# TODO: 배포 시 변경
# spring_server_ws_url = f"ws://localhost:8080/ws"
# WebSocketClient 인스턴스 생성
# ws_client = WebSocketClient(spring_server_ws_url)
# 모델 로드 # 모델 로드
model = load_model(request) model = get_model(request)
# 모델 레이블 카테고리 연결 # 모델 레이블 카테고리 연결
classes = list(request.label_map) if request.label_map else None classes = list(request.label_map) if request.label_map else None
# 결과를 저장할 리스트 # 이미지 데이터 정리
response = [] url_list = list(map(lambda x:x.image_url, request.image_list))
# 웹소켓 연결
# await connect_to_websocket(ws_client)
# 추론 # 추론
try: try:
for idx, image in enumerate(request.image_list): results = run_predictions(model, url_list, request, classes)
result = run_predictions(model, image, request, classes) response = [process_prediction_result(result, image, request.label_map) for result, image in zip(results,request.image_list)]
response_item = process_prediction_result(result, request, image)
response.append(response_item)
return response return response
except Exception as e: except Exception as e:
@ -54,43 +41,29 @@ async def detection_predict(request: PredictRequest):
send_slack_message(f"프로젝트 ID: {request.project_id} - 실패! 에러: {str(e)}",status="error") send_slack_message(f"프로젝트 ID: {request.project_id} - 실패! 에러: {str(e)}",status="error")
raise HTTPException(status_code=500, detail="Prediction process failed") raise HTTPException(status_code=500, detail="Prediction process failed")
# finally:
# send_slack_message("종료")
# if ws_client.is_connected():
# await ws_client.close()
# 모델 로드 # 모델 로드
def load_model(request: PredictRequest): def get_model(request: PredictRequest):
try: try:
model_path = request.m_key and get_model_path(request.project_id, request.m_key) return load_detection_model(request.project_id, request.m_key)
return load_detection_model(model_path=model_path)
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail="load model exception: " + str(e)) raise HTTPException(status_code=500, detail="load model exception: " + str(e))
# 웹소켓 연결
async def connect_to_websocket(ws_client):
try:
await ws_client.connect()
if not ws_client.is_connected():
raise WebSocketConnectionException("웹 소켓 연결 실패")
except Exception as e:
raise HTTPException(status_code=500, detail="websocket connect failed: " + str(e))
# 추론 실행 함수 # 추론 실행 함수
def run_predictions(model, image, request, classes): def run_predictions(model, image, request, classes):
try: try:
predict_results = model.predict( predict_results = model.predict(
source=image.image_url, source=image,
iou=request.iou_threshold, iou=request.iou_threshold,
conf=request.conf_threshold, conf=request.conf_threshold,
classes=classes classes=classes
) )
return predict_results[0] return predict_results[0]
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail="model predict exception: " + str(e)) raise HTTPException(status_code=500, detail="model predict exception: " + str(e))
# 추론 결과 처리 함수 # 추론 결과 처리 함수
def process_prediction_result(result, request, image): def process_prediction_result(result, image, label_map):
label_data = LabelData( label_data = LabelData(
version="0.0.0", version="0.0.0",
task_type="det", task_type="det",
@ -102,7 +75,7 @@ def process_prediction_result(result, request, image):
[summary['box']['x1'], summary['box']['y1']], [summary['box']['x1'], summary['box']['y1']],
[summary['box']['x2'], summary['box']['y2']] [summary['box']['x2'], summary['box']['y2']]
], ],
"group_id": request.label_map[summary['class']] if request.label_map else summary['class'], "group_id": label_map[summary['class']] if label_map else summary['class'],
"shape_type": "rectangle", "shape_type": "rectangle",
"flags": {} "flags": {}
} }
@ -119,24 +92,18 @@ def process_prediction_result(result, request, image):
data=json.dumps(label_data.dict()) data=json.dumps(label_data.dict())
) )
@router.post("/train") @router.post("/train")
async def detection_train(request: TrainRequest): async def detection_train(request: TrainRequest):
# Spring 서버의 WebSocket URL
# TODO: 배포시에 변경
spring_server_ws_url = f"ws://localhost:8080/ws"
# WebSocketClient 인스턴스 생성 send_slack_message(f"train 요청{request}", status="success")
ws_client = WebSocketClient(spring_server_ws_url)
# 데이터셋 루트 경로 얻기 # 데이터셋 루트 경로 얻기
dataset_root_path = get_dataset_root_path(request.project_id) dataset_root_path = get_dataset_root_path(request.project_id)
# 모델 로드 # 모델 로드
try: model = get_model(request)
model_path = request.m_key and get_model_path(request.project_id, request.m_key)
model = load_detection_model(model_path=model_path)
except Exception as e:
raise HTTPException(status_code=500, detail="load model exception: " + str(e))
# 학습할 모델 카테고리 정리 카테고리가 추가되는 경우에 추가할 수 있게 # 학습할 모델 카테고리 정리 카테고리가 추가되는 경우에 추가할 수 있게
names = model.names names = model.names
@ -145,41 +112,19 @@ async def detection_train(request: TrainRequest):
process_directories(dataset_root_path, names) process_directories(dataset_root_path, names)
# 레이블 맵 # 레이블 맵
inverted_label_map = None inverted_label_map = {value: key for key, value in request.label_map.items()} if request.label_map else None
if request.label_map:
inverted_label_map = {value: key for key, value in request.label_map.items()}
# 학습 데이터 분류 # 학습 데이터 분류
train_data, val_data = split_data(request.data, request.ratio) train_data, val_data = split_data(request.data, request.ratio)
try: try:
await ws_client.connect()
if not ws_client.is_connected():
raise WebSocketConnectionException()
# 학습 데이터 처리 # 학습 데이터 처리
total_data = len(train_data) for data in train_data:
for idx, data in enumerate(train_data): process_image_and_label(data, dataset_root_path, "train", inverted_label_map)
# TODO: 비동기면 await 연결
# process_image_and_label(data, dataset_root_path, "train")
# 진행률 계산
progress = (idx + 1) / total_data * 100
await ws_client.send_message("/app/ai/train/progress", f"학습 데이터 처리 중 {request.project_id}: {progress:.2f}% 완료")
# 검증 데이터 처리 # 검증 데이터 처리
total_val_data = len(val_data) for data in val_data:
for idx, data in enumerate(val_data): process_image_and_label(data, dataset_root_path, "val", inverted_label_map)
# TODO: 비동기면 await 연결
# process_image_and_label(data, dataset_root_path, "val")
# 진행률 계산
progress = (idx + 1) / total_val_data * 100
# 웹소켓으로 메시지 전송 (필요할 경우 추가)
await ws_client.send_message("/app/ai/val/progress", f"검증 데이터 처리 중 {request.project_id}: {progress:.2f}% 완료")
from ultralytics.models.yolo.detect import DetectionTrainer
def send_data(trainer): def send_data(trainer):
# 첫번째 epoch는 스킵 # 첫번째 epoch는 스킵
@ -188,52 +133,23 @@ async def detection_train(request: TrainRequest):
## 남은 시간 계산(초) ## 남은 시간 계산(초)
left_epochs = trainer.epochs-trainer.epoch left_epochs = trainer.epochs-trainer.epoch
left_sec = left_epochs*trainer.epoch_time left_seconds = left_epochs*trainer.epoch_time
## 로스 box_loss, cls_loss, dfl_loss ## 로스 box_loss, cls_loss, dfl_loss
loss = trainer.label_loss_items(loss_items=trainer.loss_items) loss = trainer.label_loss_items(loss_items=trainer.loss_items)
data = { data = ReportData(
"epoch": trainer.epoch, # 현재 에포크 epoch= trainer.epoch, # 현재 에포크
"total_epochs": trainer.epochs, # 전체 에포크 total_epochs= trainer.epochs, # 전체 에포크
"box_loss": loss["box_loss"], # box loss box_loss= loss["train/box_loss"], # box loss
"cls_loss": loss["cls_loss"], # cls loss cls_loss= loss["train/cls_loss"], # cls loss
"dfl_loss": loss["dfl_loss"], # dfl loss dfl_loss= loss["train/dfl_loss"], # dfl loss
"fitness": trainer.fitness, # 적합도 fitness= trainer.fitness, # 적합도
"epoch_time": trainer.epoch_time, # 지난 에포크 걸린 시간 (에포크 시작 기준으로 결정) epoch_time= trainer.epoch_time, # 지난 에포크 걸린 시간 (에포크 시작 기준으로 결정)
"left_second": left_sec # 남은 시간(초) left_seconds= left_seconds # 남은 시간(초)
} )
# 데이터 전송 # 데이터 전송
ws_client.send_message()
model.add_callback("on_train_epoch_start", send_data) model.add_callback("on_train_epoch_start", send_data)
model.train(
data=join_path(dataset_root_path, "dataset.yaml"),
name=join_path(dataset_root_path, "result"),
epochs=request.epochs,
batch=request.batch,
lr0=request.lr0,
lrf=request.lrf,
optimizer=request.optimizer
)
model_key = save_model(project_id=request.project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt"))
return {"model_key": model_key, "results": results.results_dict}
except WebSocketConnectionException as e:
# 학습 데이터 처리
total_data = len(train_data)
for idx, data in enumerate(train_data):
# TODO: 비동기면 await 연결
process_image_and_label(data, dataset_root_path, "train", inverted_label_map)
# 검증 데이터 처리
total_val_data = len(val_data)
for idx, data in enumerate(val_data):
# TODO: 비동기면 await 연결
process_image_and_label(data, dataset_root_path, "val", inverted_label_map)
results = model.train( results = model.train(
data=join_path(dataset_root_path, "dataset.yaml"), data=join_path(dataset_root_path, "dataset.yaml"),
name=join_path(dataset_root_path, "result"), name=join_path(dataset_root_path, "result"),
@ -247,14 +163,8 @@ async def detection_train(request: TrainRequest):
model_key = save_model(project_id=request.project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt")) model_key = save_model(project_id=request.project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt"))
return {"model_key": model_key, "results": results.results_dict} return {"model_key": model_key, "results": results.results_dict}
except Exception as e: except Exception as e:
print(f"Training process failed: {str(e)}")
raise HTTPException(status_code=500, detail="Training process failed")
finally: raise HTTPException(status_code=500, detail="model train exception: " + str(e))
if ws_client.is_connected():
await ws_client.close()

View File

@ -2,7 +2,6 @@ from fastapi import APIRouter, HTTPException
from schemas.predict_request import PredictRequest from schemas.predict_request import PredictRequest
from schemas.predict_response import PredictResponse, LabelData from schemas.predict_response import PredictResponse, LabelData
from services.load_model import load_segmentation_model from services.load_model import load_segmentation_model
from utils.file_utils import get_model_path
from typing import List from typing import List
router = APIRouter() router = APIRouter()
@ -12,8 +11,7 @@ def predict(request: PredictRequest):
# 모델 로드 # 모델 로드
try: try:
model_path = request.m_key and get_model_path(request.project_id, request.m_key) model = load_segmentation_model()
model = load_segmentation_model(model_path)
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail="load model exception: "+str(e)) raise HTTPException(status_code=500, detail="load model exception: "+str(e))

View File

@ -0,0 +1,12 @@
from pydantic import BaseModel
class ReportData(BaseModel):
epoch: int # 현재 에포크
total_epochs: int # 전체 에포크
box_loss: float # box loss
cls_loss: float # cls loss
dfl_loss: float # dfl loss
fitness: float # 적합도
epoch_time: float # 지난 에포크 걸린 시간 (에포크 시작 기준으로 결정)
left_seconds: float # 남은 시간(초)

View File

@ -4,11 +4,12 @@ from schemas.predict_response import LabelData
class TrainDataInfo(BaseModel): class TrainDataInfo(BaseModel):
image_url: str image_url: str
label: LabelData label: str
class TrainRequest(BaseModel): class TrainRequest(BaseModel):
project_id: int project_id: int
m_key: Optional[str] = Field(None, alias="model_key") m_key: Optional[str] = Field(None, alias="model_key")
m_id: Optional[str] = Field(None, alias="model_id") # 학습 중 에포크 결과를 보낼때 model_id를 보냄
label_map: dict[int, int] = Field(None, description="모델 레이블 카테고리 idx: 프로젝트 레이블 카테고리 idx , None 일경우 레이블 데이터(프로젝트 레이블)의 idx로 학습") label_map: dict[int, int] = Field(None, description="모델 레이블 카테고리 idx: 프로젝트 레이블 카테고리 idx , None 일경우 레이블 데이터(프로젝트 레이블)의 idx로 학습")
data: List[TrainDataInfo] data: List[TrainDataInfo]
ratio: float = 0.8 # 훈련/검증 분할 비율 ratio: float = 0.8 # 훈련/검증 분할 비율

View File

@ -5,35 +5,27 @@ from ultralytics.models.yolo.model import YOLO as YOLO_Model
from ultralytics.nn.tasks import DetectionModel, SegmentationModel from ultralytics.nn.tasks import DetectionModel, SegmentationModel
import os import os
import torch import torch
import re
def load_detection_model(model_path:str|None): def load_detection_model(project_id:int, model_key:str):
""" default_model_map = {"yolo8": os.path.join("resources","models","yolov8n.pt")}
지정된 경로에서 YOLO 모델을 로드합니다. # 디폴트 모델 확인
if model_key in default_model_map:
Args: model = YOLO(default_model_map[model_key])
model_path (str): 모델 파일 경로.
device (str): 모델을 로드할 장치. 기본값은 'cpu'.
'cpu' 또는 'cuda' 같은 장치를 지정할 있습니다.
Returns:
YOLO: 로드된 YOLO 모델 인스턴스
"""
if model_path:
model = load_model(model_path)
else: else:
model = YOLO(os.path.join("resources","models","yolov8n.pt")) model = load_model(model_path=os.path.join("projects",str(project_id),"models",model_key))
# Detection 모델인지 검증 # Detection 모델인지 검증
if model.task != "detect": if model.task != "detect":
raise TypeError(f"Invalid model type: {model.task}. Expected a DetectionModel.") raise TypeError(f"Invalid model type: {model.task}. Expected a DetectionModel.")
return model return model
def load_segmentation_model(model_path: str|None): def load_segmentation_model(project_id:int, model_key:str):
if model_path: default_model_map = {"yolo8": os.path.join("resources","models","yolov8n-seg.pt")}
model = YOLO(model_path) # 디폴트 모델 확인
if model_key in default_model_map:
model = YOLO(default_model_map[model_key])
else: else:
model = YOLO(os.path.join("resources","models","yolov8n-seg.pt")) model = load_model(model_path=os.path.join("projects",str(project_id),"models",model_key))
# Segmentation 모델인지 검증 # Segmentation 모델인지 검증
if model.task != "segment": if model.task != "segment":

View File

@ -3,6 +3,9 @@ import shutil
import yaml import yaml
from PIL import Image from PIL import Image
from schemas.train_request import TrainDataInfo from schemas.train_request import TrainDataInfo
from schemas.predict_response import LabelData
import urllib
import json
def get_dataset_root_path(project_id): def get_dataset_root_path(project_id):
"""데이터셋 루트 절대 경로 반환""" """데이터셋 루트 절대 경로 반환"""
@ -39,18 +42,28 @@ def process_directories(dataset_root_path:str, names:list[str]):
def process_image_and_label(data:TrainDataInfo, dataset_root_path:str, child_path:str, label_map:dict[int, int]|None): def process_image_and_label(data:TrainDataInfo, dataset_root_path:str, child_path:str, label_map:dict[int, int]|None):
"""이미지 저장 및 레이블 파일 생성""" """이미지 저장 및 레이블 파일 생성"""
# 이미지 저장 # 이미지 url로부터 파일명 분리
img = Image.open(data.image_url) img_name = data.image_url.split('/')[-1]
# 파일명에서 확장자를 제거하여 img_title과 img_ext 생성 img_path = os.path.join(dataset_root_path,child_path,img_name)
img_title, img_ext = os.path.splitext(os.path.basename(data.image_url))
# 이미지 파일 저장 (확장자를 그대로 사용) # url로부터 이미지 다운로드
img.save(os.path.join(dataset_root_path, child_path, img_title + img_ext)) urllib.request.urlretrieve(data.image_url, img_path)
# 레이블 -> 학습용 레이블 데이터 파싱(detection) # 파일명에서 확장자를 제거하여 img_title을 얻는다
label = data.label img_title = os.path.splitext(os.path.basename(img_path))[0]
with open(os.path.join(dataset_root_path, child_path, f"{img_title}.txt"), "w") as train_label_txt:
# 레이블 파일 경로
label_path = os.path.join(dataset_root_path, child_path, f"{img_title}.txt")
# 레이블 역직렬화
label = json_to_object(data.label)
# 레이블 -> 학습용 레이블 데이터 파싱 후 생성
create_detection_train_label(label, label_path, label_map)
def create_detection_train_label(label:LabelData, label_path:str, label_map:dict[int, int]|None):
with open(label_path, "w") as train_label_txt:
for shape in label.shapes: for shape in label.shapes:
train_label = [] train_label = []
x1 = shape.points[0][0] x1 = shape.points[0][0]
@ -93,5 +106,12 @@ def get_file_name(path):
raise FileNotFoundError() raise FileNotFoundError()
return os.path.basename(path) return os.path.basename(path)
def get_model_path(project_id:int, model_key:str): def json_to_object(json_string):
return os.path.join("resources", "projects", str(project_id), "models", model_key) try:
# JSON 문자열을 Python 객체로 변환
python_object = json.loads(json_string)
return python_object
except json.JSONDecodeError as e:
raise json.JSONDecodeError("json_decode_error:"+str(e))
except Exception as e:
raise Exception("exception at json_to_object:"+str(e))