Fix: Detection, Segmentation 학습 레이블 컨버터 추가

This commit is contained in:
김진현 2024-09-27 12:58:18 +09:00
parent 46c971494e
commit 02929ebc86
3 changed files with 24 additions and 18 deletions

View File

@ -115,6 +115,11 @@ async def detection_train(request: TrainRequest):
# 이 값을 학습할때 넣으면 이 카테고리들이 학습됨
names = list(request.label_map)
# 레이블 변환기 (file_util.py/create_detection_train_label() 에 쓰임)
label_converter = {request.label_map[key]:idx for idx, key in enumerate(request.label_map)}
# key : 데이터에 저장된 프로젝트 카테고리 id
# value : 모델에 저장될 카테고리 id (모델에는 key의 idx 순서대로 저장될 것임)
# 데이터 전처리: 학습할 디렉토리 & 데이터셋 설정 파일을 생성
process_directories(dataset_root_path, names)
@ -123,7 +128,7 @@ async def detection_train(request: TrainRequest):
train_data, val_data = split_data(request.data, request.ratio)
# 데이터 전처리: 데이터 이미지 및 레이블 다운로드
download_data(train_data, val_data, dataset_root_path)
download_data(train_data, val_data, dataset_root_path, label_converter)
# 학습
results = run_train(request, model,dataset_root_path)
@ -155,13 +160,13 @@ def split_data(data:list[TrainDataInfo], ratio:float):
except Exception as e:
raise HTTPException(status_code=500, detail="exception in split_data(): " + str(e))
def download_data(train_data:list[TrainDataInfo], val_data:list[TrainDataInfo], dataset_root_path:str):
def download_data(train_data:list[TrainDataInfo], val_data:list[TrainDataInfo], dataset_root_path:str, label_converter:dict[int, int]):
try:
for data in train_data:
process_image_and_label(data, dataset_root_path, "train")
process_image_and_label(data, dataset_root_path, "train", label_converter)
for data in val_data:
process_image_and_label(data, dataset_root_path, "val")
process_image_and_label(data, dataset_root_path, "val", label_converter)
except Exception as e:
raise HTTPException(status_code=500, detail="exception in download_data(): " + str(e))

View File

@ -7,10 +7,9 @@ from schemas.train_report_data import ReportData
from schemas.train_response import TrainResponse
from services.load_model import load_segmentation_model
from services.create_model import save_model
from utils.file_utils import get_dataset_root_path, process_directories, process_image_and_label, join_path
from utils.file_utils import get_dataset_root_path, process_directories, join_path
from utils.slackMessage import send_slack_message
from utils.api_utils import send_data_call_api
import random
router = APIRouter()
@ -86,6 +85,11 @@ async def segmentation_train(request: TrainRequest):
# 이 값을 학습할때 넣으면 이 카테고리들이 학습됨
names = list(request.label_map)
# 레이블 변환기 (file_util.py/create_segmentation_train_label() 에 쓰임)
label_converter = {request.label_map[key]:idx for idx, key in enumerate(request.label_map)}
# key : 데이터에 저장된 프로젝트 카테고리 id
# value : 모델에 저장될 카테고리 id (모델에는 key의 idx 순서대로 저장될 것임)
# 데이터 전처리: 학습할 디렉토리 & 데이터셋 설정 파일을 생성
process_directories(dataset_root_path, names)
@ -94,7 +98,7 @@ async def segmentation_train(request: TrainRequest):
train_data, val_data = split_data(request.data, request.ratio)
# 데이터 전처리: 데이터 이미지 및 레이블 다운로드
download_data(train_data, val_data, dataset_root_path)
download_data(train_data, val_data, dataset_root_path, label_converter)
# 학습
results = run_train(request, model,dataset_root_path)

View File

@ -39,7 +39,7 @@ def process_directories(dataset_root_path:str, model_categories:list[str]):
shutil.rmtree(os.path.join(dataset_root_path, "result"))
make_yml(dataset_root_path, model_categories)
def process_image_and_label(data:TrainDataInfo, dataset_root_path:str, child_path:str):
def process_image_and_label(data:TrainDataInfo, dataset_root_path:str, child_path:str, label_converter:dict[int,int]):
"""이미지 저장 및 레이블 파일 생성"""
# 이미지 url로부터 파일명 분리
img_name = data.image_url.split('/')[-1]
@ -60,11 +60,11 @@ def process_image_and_label(data:TrainDataInfo, dataset_root_path:str, child_pat
# 레이블 -> 학습용 레이블 데이터 파싱 후 생성
if label['task_type'] == "det":
create_detection_train_label(label, label_path)
create_detection_train_label(label, label_path, label_converter)
elif label["task_type"] == "seg":
create_segmentation_train_label(label, label_path)
create_segmentation_train_label(label, label_path, label_converter)
def create_detection_train_label(label:dict, label_path:str):
def create_detection_train_label(label:dict, label_path:str, label_converter:dict[int, int]):
with open(label_path, "w") as train_label_txt:
for shape in label["shapes"]:
train_label = []
@ -72,18 +72,18 @@ def create_detection_train_label(label:dict, label_path:str):
y1 = shape["points"][0][1]
x2 = shape["points"][1][0]
y2 = shape["points"][1][1]
train_label.append(str(shape["group_id"])) # label Id
train_label.append(str(label_converter[shape["group_id"]])) # label Id
train_label.append(str((x1 + x2) / 2 / label["imageWidth"])) # 중심 x 좌표
train_label.append(str((y1 + y2) / 2 / label["imageHeight"])) # 중심 y 좌표
train_label.append(str((x2 - x1) / label["imageWidth"])) # 너비
train_label.append(str((y2 - y1) / label["imageHeight"] )) # 높이
train_label_txt.write(" ".join(train_label)+"\n")
def create_segmentation_train_label(label:dict, label_path:str):
def create_segmentation_train_label(label:dict, label_path:str, label_converter:dict[int, int]):
with open(label_path, "w") as train_label_txt:
for shape in label["shapes"]:
train_label = []
train_label.append(str(shape["group_id"])) # label Id
train_label.append(str(label_converter[shape["group_id"]])) # label Id
for x, y in shape["points"]:
train_label.append(str(x / label["imageWidth"]))
train_label.append(str(y / label["imageHeight"]))
@ -140,7 +140,4 @@ def process_image_and_label_in_cls(data:TrainDataInfo, dataset_root_path:str, ch
label_path = os.path.join(dataset_root_path,child_path,label_name)
# url로부터 이미지 다운로드
urllib.request.urlretrieve(data.image_url, os.path.join(label_path, img_name))
def download_image(url, path):
urllib.request.urlretrieve(url, path)
urllib.request.urlretrieve(data.image_url, os.path.join(label_path, img_name))