Database 분석 (2)
Database를 활용한 헬스케어 데이터 분석
3일차: SQL 실무 + MySQL 실습
2일차 복습
GROUP BY/ HAVING / LIMIT / Aggression
INNER JOIN
LEFT JOIN
JOIN + WHERE
ALIAS 별칭 / DISTINCT
GROUP BY
HAVING
LIMIT
SELECT
FROM
WHERE
GROUP BY
HAVING
ORDER BY
LIMIT
FROM
WHERE
GROUP BY
HAVING
SELECT
ORDER BY
LIMIT
SELECT
patients.province_id
FROM
patients
INNER JOIN
province_names
ON
patients.province_id = province_names.province_id
WHERE
province_names.province_name LIKE '%a'
GROUP BY
province_names.province_name
ORDER BY
province_names.province_name desc
LIMIT 3;
SELECT*FROM patients
INNER JOIN province_names ON patients.province_id = province_names.province_id
GROUP BY provice_names.province_name
HAVING province_names.province_name LIKE '%a'
ORDER BY province_names.province_name DESC
LIMIT 3;
Aggregation 집계함수
COUNT()
SUM()
AVG()
MIN, MAX()
STDDEV()
SELECT
COUNT (patient_id), SUM(height), AVG(weight), MIN(height), MAX(weight)
FROM patients;
COUNT()
SUM()
AVG()
MIN(),MAX()
STDDEV()
SELECT
province_id,
AVG(height) AS avg_height
FROM
patients
GROUP BY
province_id
HAVING
AVG(height) >= 155
ORDER BY
avg_height DESC
LIMIT 3;
SELECT AVG(patients.height) FROM patients
INNER JOIN province_names ON patients.province_id=province_names.province_id
GROUP BY province_names.province_name
HAVING AVG(patients.height) >= 155
ORDER BY AVG(patients.height) DESC
LIMIT 3;
https://docs.google.com/document/d/14FPzr8tovZsMALHi64nMcowd4Wf4Um1zR20EFxjVnv8/edit?tab=t.0#heading=h.o2ylpaynwmk1
SQL 기본 작업
Create -> INSERT
Read -> SELECT
Update -> UPDATE
Delete -> DELETE
데이터베이스 생성 & 삭제
생성 및 사용
CREATE DATABASE db_namer
USE db_name;
DROP DATABASE db_name;
Workbench에는 여러 개의 DB가 있을 수 있음
USE로 작업 대상 DB를 선택해야 함
CREATE DATABASE mytest1;
/*
*/
--
--
CREATE TABLE table_name
column1 datatype constraints,
column2 datatype constraints,
);
DROP TABLE
create database mytest;
use mytest;
CREATE TABLE patient_info(
id INT auto_increment primary key,
name varchar(50) not null,
gender char(1),
birth_date date
city varchar(50) default 'Unknown'
);
DROP TABLE
CREATE TABLE labs (
lab_id INT AUTO_INCREMENT PRIMARY KEY,
patient_id INT,
lab_name VARCHAR(100),
result_value DECIMAL(10,4),
result_date DATE,
CONSTRAINT fk_labs_patient
FOREGIN KEY (patient_id) REFERENCES patients(patient_id)
DESCRIBE : 스키마 요약본 읽기
DESCRIBE table_name;
약자 DESC
SHOW
테이블 목록 확인
SHOW TABLES;
테이블 생성 SQL 전체 확인
SHOW CREATE TABLE table_name;
ALTER TABLE
DROP COLUMN
MODIFY
CREATE TABLE
DB Table 3개 생성
_______________________________________________________________________
create database mytest;
use mytest;
CREATE TABLE patient_info(
id INT auto_increment primary key,
name varchar(50) not null,
gender char(1),
birth_date date
city varchar(50) default 'Unknown'
);
_______________________________________________________________________
CREATE TABLE patients(
id INT auto_increment primary key,
name varchar(50) not null,
gender char(1),
birth_date date
);
CREATE TABLE diagnoses(
diag_id INT auto_increment primary key,
patient_id INT foreign key,
diagnosis_name varchar(50),
birth_date date
);
CREATE TABLE visits(
visit_id INT auto_increment primary key,
patient_id INT foreign key,
visit_date DATETIME,
doctor VARCHAR(50)
);
__________________________________________________________
-- 데이터베이스가 이미 존재할 경우 삭제하고 새로 생성합니다.
DROP DATABASE IF EXISTS mytest20251203;
-- 새 데이터베이스를 생성합니다.
CREATE DATABASE mytest20251203;
-- mytest20251203 데이터베이스를 사용합니다.
USE mytest20251203;
-- patients 테이블 생성
CREATE TABLE patients (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50) NOT NULL,
gender CHAR(1),
birth_date DATE
);
-- diagnoses 테이블 생성
-- patient_id에 외래 키 제약 조건 추가
CREATE TABLE diagnoses (
diag_id INT AUTO_INCREMENT PRIMARY KEY,
patient_id INT,
diagnosis_name VARCHAR(50),
-- birth_date 컬럼은 diagnoses 테이블에서 불필요해 보여 제거했습니다.
FOREIGN KEY (patient_id) REFERENCES patients(id)
);
-- visits 테이블 생성
-- patient_id에 외래 키 제약 조건 추가
CREATE TABLE visits (
visit_id INT AUTO_INCREMENT PRIMARY KEY,
patient_id INT,
visit_date DATETIME,
doctor VARCHAR(50),
FOREIGN KEY (patient_id) REFERENCES patients(id)
);
일반적인 데이터베이스 설계에서는 birth_date와 같은 속성값보다는 환자의 고유 식별자인 patient_id (PK)를 외래 키로 사용하는 것이 훨씬 더 권장됩니다.
birth_date를 외래 키로 사용하면 같은 생년월일을 가진 두 명 이상의 환자를 patients 테이블에 입력할 수 없게 됩니다.
__________________________________________________________
insert into patient_info (name, gender, birth_date, city)
values('Emily Park','F','1990-07-21','Seoul');
UPDATE
UPDATE table_name
SET column1 = value 1, column2 = value2
WHERE 조건;
WHERE를 빼면 전체 데이터가 변경된다.
DELETE
DELETE FROM table_name
WHERE 조건;
실문에서는 DELETE 대신 soft delete 사용
특히 병원 시스템에서는 DELETE 거의 금지
__________________________________________________________
SELECT patients
__________________________________________________________
Unhandled exception: 'cp949' codec can't decode byte 0xeb
in position 3: illegal multibyte sequence
__________________________________________________________
create database mytest;
use mytest;
CREATE TABLE patient_info(
id INT auto_increment primary key,
name varchar(50) not null,
gender char(1),
birth_date date
city varchar(50) default 'Unknown'
);
CREATE TABLE patients(
id INT auto_increment primary key,
name varchar(50) not null,
gender char(1),
birth_date date
);
CREATE TABLE diagnoses(
diag_id INT auto_increment primary key,
patient_id INT foreign key,
diagnosis_name varchar(50),
birth_date date
);
CREATE TABLE visits(
visit_id INT auto_increment primary key,
patient_id INT foreign key,
visit_date DATETIME,
doctor VARCHAR(50)
);
__________________________________________________________
-- 데이터베이스가 이미 존재할 경우 삭제하고 새로 생성합니다.
DROP DATABASE IF EXISTS mytest20251203;
-- 새 데이터베이스를 생성합니다.
CREATE DATABASE mytest20251203;
-- mytest20251203 데이터베이스를 사용합니다.
USE mytest20251203;
-- patients 테이블 생성
CREATE TABLE patients (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50) NOT NULL,
gender CHAR(1),
birth_date DATE
);
-- diagnoses 테이블 생성
-- patient_id에 외래 키 제약 조건 추가
CREATE TABLE diagnoses (
diag_id INT AUTO_INCREMENT PRIMARY KEY,
patient_id INT,
diagnosis_name VARCHAR(50),
-- birth_date 컬럼은 diagnoses 테이블에서 불필요해 보여 제거했습니다.
FOREIGN KEY (patient_id) REFERENCES patients(id)
);
-- visits 테이블 생성
-- patient_id에 외래 키 제약 조건 추가
CREATE TABLE visits (
visit_id INT AUTO_INCREMENT PRIMARY KEY,
patient_id INT,
visit_date DATETIME,
doctor VARCHAR(50),
FOREIGN KEY (patient_id) REFERENCES patients(id)
);
일반적인 데이터베이스 설계에서는 birth_date와 같은 속성값보다는 환자의 고유 식별자인 patient_id (PK)를 외래 키로 사용하는 것이 훨씬 더 권장됩니다.
birth_date를 외래 키로 사용하면 같은 생년월일을 가진 두 명 이상의 환자를 patients 테이블에 입력할 수 없게 됩니다.
__________________________________________________________
USE health_ai;
create table patients(
id INT AUTO_INCREMENT PRIMARY KEY,
name varchar(50) not null,
gender enum('M','F'),
birth_date date
);
create table diagnoses
diag_id
__________________________________________________________
USE mytest20251203;
ALTER TABLE patients ADD age INT;
ALTER TABLE visits CHANGE doctor hospital VARCHAR(100);
_________________________________________________________________
-- 환자 (patients) 테이블 데이터 삽입
INSERT INTO patients (id, name, age, gender, birth_date) VALUES
(1, 'Charlie', 65, 'M', '1960-07-07'),
(2, 'Baley', 55, 'M', '1970-04-04'),
(3, 'Alfredo', 32, 'M', '1993-03-08'),
(4, 'Janet', 81, 'F', '1944-04-02'),
(5, 'Thomas', 21, 'M', '2004-02-01'),
(6, 'George', 8, 'M', '2017-07-09'),
(7, 'Louis', 70, 'M', '1955-05-11'),
(8, 'Karen', 57, 'F', '1968-06-06'),
(9, 'Morris', 66, 'F', '1959-05-18'),
(10, 'Yamin', 43, 'M', '1982-08-18');
-- 진단 (diagnosis) 테이블 데이터 삽입
-- diagnosis_id 컬럼은 이미지를 기반으로 임의의 ID를 사용했습니다.
INSERT INTO diagnosis (diagnosis_id, patient_id, diagnosis_name, diag_date) VALUES
(480, 1, 'Hypertension', '2016-07-08'),
(280, 2, 'Diabetes', '2012-04-18'),
(880, 3, 'Concussion', '2015-11-19'),
(90, 4, 'Fracture', '2014-04-14'),
(777, 5, 'Heartburn', '2025-04-01'),
(444, 6, 'Eczema', '2013-03-13'),
(987, 7, 'HeartFailure', '2016-06-06'),
(909, 8, 'Cataract', '2018-08-18'),
(432, 9, 'Glaucoma', '2012-02-12'),
(1208, 10, 'KidneyStone', '2017-07-07');
-- 방문 (visits) 테이블 데이터 삽입
-- visit_id 컬럼은 문자열 형식이므로 VARCHAR 타입이 필요합니다.
INSERT INTO visits (visit_id, patient_id, visit_date, hospital) VALUES
('00001-1', 1, '2016-07-08', 'Seoul Clinic'),
('00001-2', 2, '2012-04-18', 'Mayo Hospital'),
('00001-3', 3, '2015-11-19', 'Springfield Clinic'),
('00001-4', 4, '2014-04-14', 'Springfield Clinic'),
('00001-5', 5, '2025-04-01', 'Seoul Clinic'),
('00001-6', 6, '2013-03-13', 'Mayo Hospital'),
('00001-7', 7, '2016-06-06', 'Springfield Clinic'),
('00001-8', 8, '2018-08-18', 'Seoul Clinic'),
('00001-9', 9, '2012-02-12', 'Springfield Clinic'),
('00001-10', 10, '2017-07-07', 'Mayo Hospital');
댓글
댓글 쓰기