Merge branch 'ai/refactor/train' into 'ai/develop'

Refactor: Segmentation 로깅 처리 See merge request s11-s-project/S11P21S002!289
2024-10-04 14:52:19 +09:00 · 2024-10-04 14:52:19 +09:00 · fbc2c1b46a
commit fbc2c1b46a
parent 4afe688f58 0871b1e951
2 changed files with 112 additions and 64 deletions
--- a/ai/app/api/yolo/detection.py
+++ b/ai/app/api/yolo/detection.py
@ -20,23 +20,39 @@ router = APIRouter()

@router.post("/predict")
 async def detection_predict(request: PredictRequest):
-    send_slack_message(f"predict 요청: {request}", status="success")
+    project_id = request.project_id
+    send_slack_message(f"Detection predict 요청 (projectId: {project_id})", status="success")

    # 모델 로드
+    start_time = time.time()
+    send_slack_message(f"모델 로드 중 (projectId: {project_id})...", status="success")
    model = get_model(request.project_id, request.m_key)
+    send_slack_message(f"모델 로드 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

    # 이미지 데이터 정리
-    url_list = list(map(lambda x:x.image_url, request.image_list))
+    start_time = time.time()
+    url_list = list(map(lambda x: x.image_url, request.image_list))
+    send_slack_message(f"이미지 데이터 정리 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 이 값을 모델에 입력하면 해당하는 클래스 id만 출력됨
    classes = get_classes(request.label_map, model.names)

    # 추론
+    start_time = time.time()
+    send_slack_message(f"추론 시작 (projectId: {project_id})...", status="success")
    results = run_predictions(model, url_list, request, classes)
+    send_slack_message(f"추론 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

    # 추론 결과 변환
-    response = [process_prediction_result(result, image, request.label_map) for result, image in zip(results,request.image_list)]
-    send_slack_message(f"predict 성공{response}", status="success")
+    start_time = time.time()
+    response = [process_prediction_result(result, image, request.label_map) for result, image in
+                zip(results, request.image_list)]
+    send_slack_message(f"추론 결과 변환 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")
+
+    send_slack_message(f"Detection predict 성공 (projectId: {project_id}) {response}", status="success")
+
    return response

 # 모델 로드
@ -109,17 +125,18 @@ def get_random_color():
@router.post("/train")
 async def detection_train(request: TrainRequest):

-    send_slack_message(f"train 요청{request}", status="success")
+    send_slack_message(f"Detection train 요청 projectId : {request.project_id}, 이미지 개수:{len(request.data)}", status="success")

    # 데이터셋 루트 경로 얻기 (프로젝트 id 기반)
    
    dataset_root_path = get_dataset_root_path(request.project_id)
    
    # 모델 로드
+    project_id = request.project_id
    start_time = time.time()
-    print("모델 로드")
-    model = get_model(request.project_id, request.m_key)
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
+    send_slack_message(f"모델 로드 중 (projectId: {project_id})...", status="success")
+    model = get_model(project_id, request.m_key)
+    send_slack_message(f"모델 로드 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

    # 이 값을 학습할때 넣으면 이 카테고리들이 학습됨
    names = list(request.label_map)
@ -131,48 +148,39 @@ async def detection_train(request: TrainRequest):
    
    # 데이터 전처리: 학습할 디렉토리 & 데이터셋 설정 파일을 생성
    start_time = time.time()
-    print("데이터 전처리 : 학습할 디렉토리 및 데이터셋 설정 파일 생성")
+    send_slack_message(f"데이터 전처리 시작: 학습 디렉토리 및 설정 파일 생성 중 (projectId: {project_id})...", status="success")
    process_directories(dataset_root_path, names)
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
+    send_slack_message(f"데이터 전처리 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 데이터 전처리: 데이터를 학습데이터와 검증데이터로 분류
    start_time = time.time()
-    print("데이터 전처리 : 데이터 분류")
+    send_slack_message(f"데이터 분류 중 (projectId: {project_id})...", status="success")
    train_data, val_data = split_data(request.data, request.ratio)
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
+    send_slack_message(f"데이터 분류 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 데이터 전처리: 데이터 이미지 및 레이블 다운로드
    start_time = time.time()
-    print("데이터 전처리 : 데이터 다운로드")
+    send_slack_message(f"데이터 다운로드 중 (projectId: {project_id})...", status="success")
    download_data(train_data, val_data, dataset_root_path, label_converter)
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
-    
-    # 학습
-    start_time = time.time()
-    print("학습 시작")
-    results = run_train(request, model,dataset_root_path)
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
+    send_slack_message(f"데이터 다운로드 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

-    # 학습 후 GPU 메모리 상태 확인
-    if torch.cuda.is_available():
-        allocated_memory = torch.cuda.memory_allocated() / (1024 ** 2)  # MB 단위
-        reserved_memory = torch.cuda.memory_reserved() / (1024 ** 2)  # MB 단위
-        print(f"현재 할당된 GPU 메모리: {allocated_memory:.2f} MB")
-        print(f"현재 예약된 GPU 메모리: {reserved_memory:.2f} MB")
-    else:
-        print("GPU 사용 불가능")
-    torch.cuda.empty_cache()
+    # 학습 시작
+    start_time = time.time()
+    send_slack_message(f"학습 시작 (projectId: {project_id})...", status="success")
+    results = run_train(request, model, dataset_root_path)
+    send_slack_message(f"학습 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

    # best 모델 저장
    start_time = time.time()
-    print("모델 저장")
-    model_key = save_model(project_id=request.project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt"))
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
+    send_slack_message(f"모델 저장 중 (projectId: {project_id})...", status="success")
+    model_key = save_model(project_id=project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt"))
+    send_slack_message(f"모델 저장 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

-    print("변환")
    result = results.results_dict
-    print(f'걸린 시간 {time.time() - start_time:.2f} 초')
-    
+
    response = TrainResponse(
        modelKey=model_key,
        precision= result["metrics/precision(B)"],
@ -182,9 +190,8 @@ async def detection_train(request: TrainRequest):
        accuracy=0,
        fitness= result["fitness"]
    )
-    send_slack_message(f"train 성공{response}", status="success")
+    send_slack_message(f"Detection train 성공 (projectId: {project_id}) {response}", status="success")

-    print(response)
    return response

 def split_data(data:list[TrainDataInfo], ratio:float):
--- a/ai/app/api/yolo/segmentation.py
+++ b/ai/app/api/yolo/segmentation.py
@ -1,3 +1,5 @@
+import time
+
 from fastapi import APIRouter, HTTPException
 from api.yolo.detection import get_classes, run_predictions, get_random_color, split_data, download_data
 from schemas.predict_request import PredictRequest
@ -15,24 +17,40 @@ router = APIRouter()

@router.post("/predict")
 async def segmentation_predict(request: PredictRequest):
-    send_slack_message(f"predict 요청: {request}", status="success")
+    project_id = request.project_id
+    send_slack_message(f"Segmentation predict 요청 (projectId: {project_id}, 이미지 개수: {len(request.data)})",
+                       status="success")

    # 모델 로드
-    model = get_model(request.project_id, request.m_key)
+    start_time = time.time()
+    send_slack_message(f"모델 로드 중 (projectId: {project_id})...", status="success")
+    model = get_model(project_id, request.m_key)
+    send_slack_message(f"모델 로드 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

    # 이미지 데이터 정리
-    url_list = list(map(lambda x:x.image_url, request.image_list))
+    start_time = time.time()
+    url_list = list(map(lambda x: x.image_url, request.image_list))
+    send_slack_message(f"이미지 데이터 정리 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 이 값을 모델에 입력하면 해당하는 클래스 id만 출력됨
    classes = get_classes(request.label_map, model.names)

    # 추론
+    start_time = time.time()
+    send_slack_message(f"Segmentation 추론 시작 (projectId: {project_id})...", status="success")
    results = run_predictions(model, url_list, request, classes)
+    send_slack_message(f"Segmentation 추론 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 추론 결과 변환
-    response = [process_prediction_result(result, image, request.label_map) for result, image in zip(results,request.image_list)]
-    send_slack_message(f"predict 성공{response}", status="success")
-    return response 
+    start_time = time.time()
+    response = [process_prediction_result(result, image, request.label_map) for result, image in
+                zip(results, request.image_list)]
+    send_slack_message(f"Segmentation predict 성공 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")
+
+    return response

 # 모델 로드
 def get_model(project_id:int, model_key:str):
@ -74,51 +92,74 @@ def process_prediction_result(result, image, label_map):

@router.post("/train")
 async def segmentation_train(request: TrainRequest):
+    project_id = request.project_id

-    send_slack_message(f"train 요청{request}", status="success")
+    send_slack_message(f"Segmentation train 요청 (projectId: {project_id} 이미지 개수: {len(request.data)})", status="success")

    # 데이터셋 루트 경로 얻기 (프로젝트 id 기반)
-    dataset_root_path = get_dataset_root_path(request.project_id)
+    dataset_root_path = get_dataset_root_path(project_id)

    # 모델 로드
-    model = get_model(request.project_id, request.m_key)
+    start_time = time.time()
+    send_slack_message(f"모델 로드 중 (projectId: {project_id})...", status="success")
+    model = get_model(project_id, request.m_key)
+    send_slack_message(f"모델 로드 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")

    # 이 값을 학습할때 넣으면 이 카테고리들이 학습됨
    names = list(request.label_map)

-    # 레이블 변환기 (file_util.py/create_segmentation_train_label() 에 쓰임)
-    label_converter = {request.label_map[key]:idx for idx, key in enumerate(request.label_map)}
-    # key : 데이터에 저장된 프로젝트 카테고리 id
-    # value : 모델에 저장될 카테고리 id (모델에는 key의 idx 순서대로 저장될 것임)
-    
-    # 데이터 전처리: 학습할 디렉토리 & 데이터셋 설정 파일을 생성
+    # 레이블 변환기
+    start_time = time.time()
+    label_converter = {request.label_map[key]: idx for idx, key in enumerate(request.label_map)}
+    send_slack_message(f"레이블 변환기 생성 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")
+
+    # 데이터 전처리: 학습할 디렉토리 및 설정 파일 생성
+    start_time = time.time()
+    send_slack_message(f"데이터 전처리 중 (projectId: {project_id})...", status="success")
    process_directories(dataset_root_path, names)
+    send_slack_message(f"데이터 전처리 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 데이터 전처리: 데이터를 학습데이터와 검증데이터로 분류
+    start_time = time.time()
    train_data, val_data = split_data(request.data, request.ratio)
+    send_slack_message(f"데이터 분류 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # 데이터 전처리: 데이터 이미지 및 레이블 다운로드
+    start_time = time.time()
+    send_slack_message(f"데이터 다운로드 중 (projectId: {project_id})...", status="success")
    download_data(train_data, val_data, dataset_root_path, label_converter)
+    send_slack_message(f"데이터 다운로드 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

-    # 학습
-    results = run_train(request, model,dataset_root_path)
+    # 학습 시작
+    start_time = time.time()
+    send_slack_message(f"Segmentation 학습 시작 (projectId: {project_id})...", status="success")
+    results = run_train(request, model, dataset_root_path)
+    send_slack_message(f"Segmentation 학습 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초",
+                       status="success")

    # best 모델 저장
-    model_key = save_model(project_id=request.project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt"))
-    
+    start_time = time.time()
+    send_slack_message(f"모델 저장 중 (projectId: {project_id})...", status="success")
+    model_key = save_model(project_id=project_id, path=join_path(dataset_root_path, "result", "weights", "best.pt"))
+    send_slack_message(f"모델 저장 완료 (projectId: {project_id}). 걸린 시간: {time.time() - start_time:.2f} 초", status="success")
+
    result = results.results_dict

    response = TrainResponse(
        modelKey=model_key,
-        precision= result["metrics/precision(M)"],
-        recall= result["metrics/recall(M)"],
-        mAP50= result["metrics/mAP50(M)"],
-        mAP5095= result["metrics/mAP50-95(M)"],
-        accuracy = 0,
-        fitness= result["fitness"]
+        precision=result["metrics/precision(M)"],
+        recall=result["metrics/recall(M)"],
+        mAP50=result["metrics/mAP50(M)"],
+        mAP5095=result["metrics/mAP50-95(M)"],
+        accuracy=0,
+        fitness=result["fitness"]
    )
-    send_slack_message(f"train 성공{response}", status="success")
-            
+    send_slack_message(f"Segmentation train 성공 (projectId: {project_id}) {response}", status="success")
+
    return response
    
 def run_train(request, model, dataset_root_path):