一、常見檔案類型與存取方式
01. Text
# Read Text
with open("file.txt") as file:
data = file.read()
data_in_line = file.read().splitlines()
# REwrite whole file
with open("my_file.txt", mode='w') as file:
content = file.write("new text inside")
# Append content after the original
with open("my_file.txt", mode='a') as file:
content = file.write("new text inside")
02. JSON
Write
json.dump(input_data, output_file)
Read
json.load(inptput_file)
Update
json.update(append_data)
new_data = {
website: {
"email": email,
"password": password,
}
}
with open("data.json", "r") as file:
Reading old data
data = json.load(file)
except FileNotFoundError:
with open("data.json", "w") as file:
json.dump(new_data, file, indent=4)
else:
# Updating old data with new data
data.update(new_data)
with open("data.json", "w") as file:
# Saving updated data
json.dump(data, file, indent=4)
03. CSV
# CSV => DataFrame
data = pandas.read_csv("data/word_to_learn.csv")
# DataFrame => dictionary in list
to_learn = data.to_dict("records")
# list => DataFrame
to_learn_dataframe = pandas.DataFrame(to_learn)
# DataFrame => CSV
to_learn_dataframe.to_csv("data/word_to_learn.csv", index=False)
二、常見資料型態與建立
01. List
new_list = [new_item for item in list]
new_list = [new_item for item in list if test]
Create from Series
temp_list = data["temp"].to_list()
to_learn.remove(current_card)
02. Dictionary
new_dict = {new_key:new_value for item in list}
new_dict = {new_key:new_value for (key,value) in dict.items()}
new_dict = {new_key:new_value for (key,value) in dict.items() if test}
Create from Data Frame
data_dict = data.to_dict()
03. Data Frame
Create from Dictionary
student_data_frame = pandas.DataFrame(student_score)
student_score = {
"students": ["Amy", "James", "Penny"],
"scores": [76, 56, 65]
}
Create from CSV
student_data_frame = pandas.read_csv("student_score.csv")
student score
0 Amy 56
1 James 76
2 Lily 98
04. Series
Create from Data Frame
student_score = data["score"]
student_score = data.score
Calculate data in Series
# 求 student_score 的最大值
highest_score = data["temp"].max()
# 求 student_score 的平均值
average_score = data.temp.mean()
# 求 weekday = "Monday" 時的攝氏氣溫並轉成華氏氣溫
monday = data[data.day == "Monday"]
f_temp = int(monday.c_temp) * 9 / 5 + 32
# 求Monday的資料
monday_info = data[data.day == "Monday"]
# 求最高溫的日期
highest_temp_day = data[data.temp == data.temp.max()]
三、Data Frame 內部資料取得
# Dictionary
student_score = {
"students": ["Amy", "James", "Penny"],
"scores": [76, 56, 65]
}
# Data Frame
student score
0 Amy 56
1 James 76
2 Lily 98
01. Dictionary.items()
for(key, value) in student_dict.items():
print(key)
student
score
for(key, value) in student_dict.items():
print(value)
['Amy', 'James', 'Lily']
[56, 76, 98]
02. Data Frame.items() => Python內建語法
for(key, value) in student_data_frame.items():
print(key)
student
score
for(key, value) in student_data_frame.items():
print(value)
0 Amy
1 James
2 Lily
Name: score, dtype: int64
0 56
1 76
2 98
Name: score, dtype: int64
03. Data Frame.iterrows() => Pandas改良語法
index (直行資料),印出表格第一直行之索引
for(index, row) in student_data_frame.iterrows():
print(index)
0
1
2
row (橫列資料),印出表格每一橫列之內容(且保留標題)
for(index, row) in student_data_frame.iterrows():
print(row)
student Amy
score 56
Name: 0, dtype: object
student James
score 76
Name: 1, dtype: object
student Lily
score 98
Name: 2, dtype: object
04. 特定條件下資料取得
# 求學生名單
for(index, row) in student_data_frame.iterrows():
print(row.student)
# 也可用Series取得
student = tudent_data_frame.student
# output
> Amy
> James
> Lily
# 求Amy的成績
for(index, row) in student_data_frame.iterrows():
if row.student == 'Amy'
print(row.score)
# 也可用Series取得
amy_info = student_data_frame[student_data_frame.score == "Amy"]
amy_score = amy.info.score
# output
> 56
# 求成績最高學生
highest_score_student = student_data_frame.name[student_data_frame.score == student_data_frame.name.max()]