AI 헬스케어 2기 데이터 분석 코스 Python (2)

첫날 실습 과제에 대해서 설명해주셨다.

반올림

round()

소수점 첫째자리까지만 보여주세요

.1f

소수점 둘째자리까지만 보여주세요.

.2f

#feet으로 변환

height_feet = height_cm / 30.48

# 원더독스

wonder_dogs1 = "인쿠시"

wonder_dogs2 = "표승주"

wonder_dogs3 = "백채림"

def say_hello():

print("안녕하세요")

say_hello()

def say_hello():

print("안녕하세요")

say_hello()

def say_hello():

print("안녕하세요")

say_hello()

wonder_dogs4 = "이나현"

wonder_dogs5 = "한송희"

위와 같이 하면 유지 보수의 어려움이 있게 된다.

자료구조

많은 양의 데이터를 체계적으로 다루는 방법

여러 데이터를 효율적으로 관리하는 4가지 주요 자료구조

# 원더독스

wonder_dogs1 = "인쿠시"

wonder_dogs2 = "표승주"

wonder_dogs3 = "백채림"

wonder_dogs4 = "이나현"

wonder_dogs5 = "한송희"

wonder_dogs = [''인쿠시'', ''표승주'',''백채림'', "이나현", "한송희"]

,를 기준으로 데이터의 갯수가 나온다.

리스트, 요소(element)

인덱싱과 슬라이싱

인덱싱

특정 위치의 데이터 가져오기

#처방약 목록

medications = ["아스피린", "메트포르민", "리시노프릴", "아토르바스타틴", "오메프라졸"]

medications[2]

'리시노프릴'

컴퓨터는 숫자를 0부터 센다. (index, list)

뒤에서부터 셀 수도 있다.

medications [-2]

'아토르바스타틴'

슬라이싱

[시작:끝]: 시작 인덱스부터 끝-1 인덱스까지

[:끝]: 처음부터 끝-1 인덱스까지

[시작:]: 시작부터 끝까지

[:]: 전체

[시작:끝:간격]: 시작부터 끝-1 인덱스까지 간격만큼 건너뛰기

[::간격]: 처음부터 끝까지 간격만큼 건너뛰기

medications[0:3]

['아스피린', '메트포르민', '리시노프릴']

medications[:3]

['아스피린', '메트포르민', '리시노프릴']

medications[-2:]

['아토르바스타틴', '오메프라졸']

# 짝수 인덱스만 (0, 2, 4번째)

medications[::2]

['아스피린', '리시노프릴', '오메프라졸']

# 전체 역순

medications[::-1]

['오메프라졸', '아토르바스타틴', '리시노프릴', '메트포르민', '아스피린']

# 문자열 슬라이싱

patient_name = "김철수"

print(patient_name[0:2])

print(patient_name[1:3])

print(patient_name[1:])

print(patient_name[:1])

리스트 조작하기

append() 리스트 끝에 1개 요소 추가

insert(위치, 값)

extend() 리스트 끝에 여러 요소 한번에 추가

append()

# append: 1개 데이터를 맨 끝에 추가

blood_pressures.append(135)

print(f"새 측정 후: {blood_pressures}")

새 측정 후: [120, 125, 130, 118, 135]

# insert: 1개 데이터를 원하는 위치에 추가

blood_pressures.insert(1, 122) # 인덱스 1 번째 위치에 추가

print(f"누락값 추가 후: {blood_pressures}")

누락값 추가 후: [120, 122, 125, 130, 118, 135]

# extend: 여러 데이터를 맨 끝에 추가

evening_readings = [128, 122, 119]

blood_pressures.extend(evening_readings)

print(f"저녁 측정값 추가 후: {blood_pressures}")

저녁 측정값 추가 후: [120, 122, 125, 130, 118, 135, 128, 122, 119]

# extend: 여러 데이터를 맨 끝에 추가

evening_readings = [128, 122, 119]

blood_pressures.extend(evening_readings)

print(f"저녁 측정값 추가 후: {blood_pressures}")

# append vs extend 차이점

test_readings = [140, 145]

blood_pressures.append(test_readings) # 리스트가 하나의 요소로 추가됨

print(f"append로 추가: {blood_pressures}")

append로 추가: [120, 122, 125, 130, 118, 135, 128, 122, 119, [140, 145]]

remove(값)

pop(인덱스)

clear()

del

# 처방약 목록 관리

medications = ["아스피린", "메트포르민", "임시처방약", "리시노프릴", "아토르바스타틴"]

medications

['아스피린', '메트포르민', '임시처방약', '리시노프릴', '아토르바스타틴']

# remove: 값으로 삭제 (첫 번째 일치하는 것만 삭제)

medications.remove("임시처방약")

print(f"처방 제거 후: {medications}")

처방 제거 후: ['아스피린', '메트포르민', '리시노프릴', '아토르바스타틴']

# pop: 특정 인덱스로 삭제하고 삭제된 값 반환

removed_med = medications.pop(1) # 인덱스 1번째 약물 삭제

print(f"삭제된 약물: {removed_med}")

print(f"처방 제거 후: {medications}")

삭제된 약물: 메트포르민

처방 제거 후: ['아스피린', '리시노프릴', '아토르바스타틴']

# 마지막 요소 삭제

last_med = medications.pop()

print(f"삭제된 약물: {last_med}")

print(f"처방 제거 후: {medications}")

삭제된 약물: 아토르바스타틴

처방 제거 후: ['아스피린', '리시노프릴']

# clear: 리스트의 모든 요소 삭제 (모든 처방 중단)

medications.clear()

print(f"처방 제거 후: {medications}")

처방 제거 후: []

# del: 변수 자체나 특정 인덱스 삭제

blood_pressures = [120, 125, 130, 118, 135, 128, 122] # 혈압 측정값

del blood_pressures[0] # 첫 번째 측정값 삭제

print(f"첫 측정값 제외: {blood_pressures}")

del blood_pressures[:2] # 처음 2개 측정값 삭제

print(f"초기 측정값들 제외: {blood_pressures}")

첫 측정값 제외: [125, 130, 118, 135, 128, 122]

초기 측정값들 제외: [118, 135, 128, 122]

del blood_pressures # 변수 삭제

blood_pressures

NameError Traceback (most recent call last)

/tmp/ipython-input-3289696385.py in <cell line: 0>()

----> 1 del blood_pressures # 변수 삭제

2 blood_pressures

NameError: name 'blood_pressures' is not defined

정보를 확인하는 메소드

len ()

index(값)

count(값)

in 값 존재 여부 확인 Boolean

in 연산자

작은 거 in 큰 거

patient_count = len(patient_names)

print(f"총 환자 수: {patient_count}")

# 환자 이름 목록

patient_names = ['김철수', '이영희', '박민수', '이영희', '최지연']

patient_names

['김철수', '이영희', '박민수', '이영희', '최지연']

# len(): 리스트 길이 반환

patient_count = len(patient_names)

print(f"총 환자 수: {patient_count}")

총 환자 수: 5

중복이 되어도 위치에 따라 다른 정보로 안다.

# index(): 특정 값의 index위치 반환 (첫 번째로 나타나는 위치)

lee_position = patient_names.index('이영희')

print(f"이영희 환자 위치: {lee_position}")

이영희 환자 위치: 1

# count(): 특정 값의 개수 반환

lee_count = patient_names.count('이영희')

print(f"이영희 동명이인 수: {lee_count}")

이영희 동명이인 수: 2

# in 연산자: 값 존재 여부 확인 (True/False)

print(f"홍길동 환자 등록: {'홍길동' in patient_names}")

print(f"김철수 환자 등록: {'김철수' in patient_names}")

홍길동 환자 등록: False

김철수 환자 등록: True

# 환자별 체중 데이터 분석

patient_weights = [65.5, 78.2, 52.1, 89.7, 71.3]

print(f"원본 체중 데이터: {patient_weights}")

원본 체중 데이터: [65.5, 78.2, 52.1, 89.7, 71.3]

# sorted(): 새로운 정렬된 리스트 반환 (원본 유지)

sorted_weights = sorted(patient_weights)

print(f"원본 유지: {patient_weights}")

print(f"정렬된 체중: {sorted_weights}")

원본 유지: [65.5, 78.2, 52.1, 89.7, 71.3]

정렬된 체중: [52.1, 65.5, 71.3, 78.2, 89.7]

# sort(): 원본 리스트 자체를 정렬

patient_weights.sort()

print(f"원본 정렬됨: {patient_weights}")

원본 정렬됨: [52.1, 65.5, 71.3, 78.2, 89.7]

int(True)

int(False)

test = [65.5, 78.2, 52.1, True, "김철수", 89.7, 71.3]

65.5>"김철수"

result = sorted(test)

---------------------------------------------------------------------------

TypeError Traceback (most recent call last)

/tmp/ipython-input-2357090776.py in <cell line: 0>()

----> 5 65.5>"김철수"

6 result = sorted(test)

TypeError: '>' not supported between instances of 'float' and 'str'

test = [65.5, 78.2, 52.1, True, 89.7, 71.3]

print(f"{test}")

test = [65.5, 78.2, 52.1, True, 89.7, 71.3]

print(f"{test}")

# reverse(): 리스트 순서를 뒤집기

patient_names = ["김철수", "이영희", "박민수", "최지연"]

patient_names.reverse()

print(f"뒤집어서 정렬: {patient_names}")

뒤집어서 정렬: ['최지연', '박민수', '이영희', '김철수']

# 특별한 정렬 방법들

departments = ['소아과', '내과', '산부인과', '외과', '정형외과'] # 진료과 목록

by_length = sorted(departments, key=len) # 길이순 정렬

print(f"길이순: {by_length}")

# 가나다순 정렬

sorted_departments = sorted(departments)

print(f"가나다순: {sorted_departments}")

# 역순 정렬 (ㅎ부터 ㄱ까지)

reverse_departments = sorted(departments, reverse=True)

print(f"내림차순: {reverse_departments}")

# 특별한 정렬 방법들

departments = ['소아과', '내과', '산부인과', '외과', '정형외과'] # 진료과 목록

by_length = sorted(departments, key=len) # 길이순 정렬

print(f"길이순: {by_length}")

# 가나다순 정렬

sorted_departments = sorted(departments)

print(f"가나다순: {sorted_departments}")

# 역순 정렬 (ㅎ부터 ㄱ까지)

reverse_departments = sorted(departments, reverse=True)

print(f"내림차순: {reverse_departments}")

길이순: ['내과', '외과', '소아과', '산부인과', '정형외과']

가나다순: ['내과', '산부인과', '소아과', '외과', '정형외과']

내림차순: ['정형외과', '외과', '소아과', '산부인과', '내과']

변환 메소드

리스트를 다른 형태로 변환하는 메소드입니다.

# 진료과 목록을 문자열로 변환

departments = ["내과", "외과", "소아과", "산부인과"]

# join(): 리스트를 문자열로 합치기

result1 = ", ".join(departments)

print(f"진료과 목록: {result1}")

result2 = " | ".join(departments)

print(f"진료과 구분: {result2}")

result3 = "".join(departments)

print(f"진료과 연결: {result3}")

진료과 목록: 내과, 외과, 소아과, 산부인과

진료과 구분: 내과 | 외과 | 소아과 | 산부인과

진료과 연결: 내과외과소아과산부인과

# 숫자 리스트를 문자열로 합치기 (문자열로 변환 후 join)

# 혈압 수치를 문자열로 변환

systolic_pressures = [120, 125, 130, 135]

bp_string = ", ".join(map(str, systolic_pressures))

print(f"수축기 혈압 기록: {bp_string}")

수축기 혈압 기록: 120, 125, 130, 135

리스트 연산

medications.remove("임시약")

print(f"임시약 제거 후: {medications}")

튜플의 불변성(Immutable)

불변성이란?

불변성(immutable)이란 객체가 생성된 후 그 내용을 변경할 수 없는 성질을 말합니다.

# 가변(mutable) vs 불변(immutable) 비교

patient_list = ["김환자", "65세"] # 리스트: 가변

patient_tuple = ("김환자", "65세") # 튜플: 불변

# 리스트는 내용 변경 가능

patient_list[1] = "66세"

print(f"리스트 변경 후: {patient_list}")

# 튜플은 내용 변경 불가능

patient_tuple[1] = "66세" # 에러 발생!

튜플은 **불변성(immutable)**을 가진 자료구조입니다. 즉, 한 번 생성된 후에는 내용을 변경할 수 없습니다.

왜 불변성이 중요한가?

데이터 안전성: 실수로 중요한 정보(환자 ID, 생년월일 등)가 변경되는 것을 방지

해시 가능: 딕셔너리의 키로 사용 가능

메모리 효율성: 같은 내용의 튜플은 메모리를 공유할 수 있음

# 의료 정보에서 불변성 활용 예시

HOSPITAL_LOCATION = (37.5665, 126.9780) # 병원 위치 좌표 (변경되면 안 됨)

EMERGENCY_CODE = ("119", "응급실", "24시간") # 응급 연락처 (변경되면 안 됨)

print(f"병원 위치: {HOSPITAL_LOCATION}")

print(f"응급 연락처: {EMERGENCY_CODE}")

튜플 사용하기

numbers = (1, 2, 3, 4, 5)

type(numbers)

tuple

# 괄호 씌우지 않으면 튜플

blood_types = "A", "B", "O", "AB"

print(f"혈액형: {blood_types}")

print(f"데이터 타입: {type(blood_types)}")

혈액형: ('A', 'B', 'O', 'AB')

데이터 타입: <class 'tuple'>

# 데이터 읽기

print(f"첫 번째 혈액형: {blood_types[0]}")

print(f"마지막 혈액형: {blood_types[-1]}")

첫 번째 혈액형: A

마지막 혈액형: AB

patient_infos = ("김환자", 65, "고혈압")

# 튜플 언패킹

name, age, diagnosis = patient_infos

print(f"{name}은 {age}살이고 {diagnosis} 진단을 받았습니다")

김환자은 65살이고 고혈압 진단을 받았습니다

# 튜플은 변경 불가능

patient_infos[0] = "박환자"

---------------------------------------------------------------------------

TypeError Traceback (most recent call last)

/tmp/ipython-input-2971948180.py in <cell line: 0>()

1 # 튜플은 변경 불가능

----> 2 patient_infos[0] = "박환자"

TypeError: 'tuple' object does not support item assignment

딕셔너리

딕셔너리는 키(key)와 값(value)의 쌍으로 데이터를 저장하는 자료구조입니다.

마치 사전에서 단어(키)를 찾으면 뜻(값)이 나오는 것과 같습니다.

{

"파이썬":"프로그래밍 언어"

}

# 환자 정보 딕셔너리 만들기

patient = {

"name": "김환자", # str

"age": 65, # int

"diagnosis": "고혈압", # str

"medications": ["아스피린", "메트포르민"] # list

}

김환자 65 고혈압 ....

name age diagnosis ...

patient["name"]

'김환자'

patient["age"]

patient["phone"] # 없는 키값을 지정하면 키 에러

---------------------------------------------------------------------------

KeyError Traceback (most recent call last)

/tmp/ipython-input-3631803009.py in <cell line: 0>()

----> 1 patient["phone"] # 없는 키값을 지정하면 키 에러

KeyError: 'phone'

# 초기 환자 정보

patient = {

"name": "김환자",

"age": 65,

"diagnosis": "고혈압"

}

patient

{'name': '김환자', 'age': 65, 'diagnosis': '고혈압'}

# 새 정보 추가

patient["phone"] = "010-1234-5678"

patient["blood_type"] = "A형"

patient["doctor"] = "이의사"

# 정보 추가 후

patient

{'name': '김환자',

'age': 65,

'diagnosis': '고혈압',

'phone': '010-1234-5678',

'blood_type': 'A형',

'doctor': '이의사'}

# 정보 수정

patient["age"] = 66 # 나이 증가

patient["diagnosis"] = "고혈압, 당뇨" # 추가 진단

patient

{'name': '김환자',

'age': 66,

'diagnosis': '고혈압, 당뇨',

'phone': '010-1234-5678',

'blood_type': 'A형',

'doctor': '이의사'}

# pop(): 특정 키 삭제하고 값 반환

removed_phone = patient.pop("phone")

print(f"삭제된 전화번호: {removed_phone}")

print("전화번호 삭제 후:", patient)

삭제된 전화번호: 010-1234-5678

전화번호 삭제 후: {'name': '김환자', 'age': 65, 'diagnosis': '고혈압', 'blood_type': 'A형'}

# pop() 기본값 설정 (키가 없을 때)

email = patient.pop("email", "이메일 정보 없음")

print(f"이메일: {email}")

print("현재 환자 정보:", patient)

이메일: 이메일 정보 없음

현재 환자 정보: {'name': '김환자', 'age': 65, 'diagnosis': '고혈압', 'blood_type': 'A형'}

# popitem(): 마지막 키-값 쌍 삭제

last_item = patient.popitem() # 키-값 튜플로 반환

print(f"마지막 삭제 항목: {last_item}")

print("마지막 항목 삭제 후:", patient)

마지막 삭제 항목: ('blood_type', 'A형')

마지막 항목 삭제 후: {'name': '김환자', 'age': 65, 'diagnosis': '고혈압'}

# 안전한 방법 (키가 없을경우 None 반환)

mri_cost = medical_costs.get("MRI비용")

print("MRI 비용:", mri_cost)

MRI 비용: None

# 안전한 접근을 위한 조건문

if "수술비" in medical_costs:

print(f"수술비: {medical_costs['수술비']}원")

else:

print("수술비 정보가 없습니다")

수술비 정보가 없습니다

# 명시적으로 작성

print("진료비 항목 있음:", "진료비" in medical_costs.keys())

print("수술비 항목 있음:", "수술비" in medical_costs.keys())

진료비 항목 있음: True

수술비 항목 있음: False

집합 - 중복 없는 데이터

집합(set)은 중복을 허용하지 않으며

순서가 없는 자료구조입니다.

수학의 집합 개념과 동일합니다.

# 집합 만들기 (중복 자동 제거)

allergy_drugs_set = {"페니실린", "아스피린", "페니실린", "이부프로펜", "아스피린"}

print(f'알레르기 약물 집합: {allergy_drugs_set}')

print(f'고유 약물 개수: {len(allergy_drugs_set)}')

알레르기 약물 집합: {'이부프로펜', '아스피린', '페니실린'}

고유 약물 개수: 3

# 리스트를 집합으로 변환 (중복 제거)

symptoms_list = ["발열", "기침", "발열", "두통", "기침", "오한"] # 중복된 증상들

symptoms_set = set(symptoms_list)

print(f'고유 증상들: {symptoms_set}') # 순서가 없기 때문에 출력 순서가 다를 수 있음

고유 증상들: {'기침', '두통', '오한', '발열'}

자료구조 변환과 내장 함수

내장 함수 활용

len() - 길이/개수 확인

sum() - 합계 계산

max(), min() - 최댓값, 최솟값

all(), any() - 논리 검사

AI Health Care Camp at OZ Coding School

AI 헬스케어 2기 데이터 분석 코스 Python (2)

댓글

댓글 쓰기

이 블로그의 인기 게시물

베이스 캠프에서 (1)

베이스 캠프에서 (2)

Database 분석 (4)