import pandas as pd
import numpy as np
contents={"name": ['Bob', 'LiSa', 'Mary', 'Alan'],
"ID": [1, 2, ' ', None], # 輸出 NaN
"age": [np.nan, 28, 38 , '' ], # 輸出
"age02": [14, 26, 24 , 6],
"born": [pd.NaT, pd.Timestamp("1990-01-01"), pd.Timestamp("1980-01-01"), ''], # 輸出 NaT
"sex": ['男', '女', '女', None,], # 輸出 None
"hobbey":['打籃球', '打羽毛球', '打乒乓球', '',], # 輸出
"money":[200.0, 240.0, 290.0, 300.0], # 輸出
"weight":[140.5, 120.8, 169.4, 155.6], # 輸出
"test01":[1, 2.123456789, 3.123456781011126, 4.123456789109999], # 輸出
"test02":[1, 2.123456789, 3.123456781011126, 4.123456789109999], # 輸出
}
data_frame = pd.DataFrame(contents)
# T1、直接創(chuàng)建 category類型數(shù)據(jù)
weight_mark=pd.Categorical(['thin','medium','medium','fat'],categories=['medium','fat'])
print(weight_mark)
# T2、利用分箱機制(結合max、mean、min實現(xiàn)二分類)動態(tài)添加 category類型數(shù)據(jù)
col_age_des=pd.Series(data_frame['age02']).describe()
age_ranges=[col_age_des['min']-1,col_age_des['mean'],col_age_des['max']+1]
age_labels=['Minors','Adults'] # 高于平均值的為胖
data_frame['age02_mark']=pd.cut(data_frame['age02'],age_ranges,labels=age_labels)
print(data_frame)
到此這篇關于詳細介紹在pandas中創(chuàng)建category類型數(shù)據(jù)的幾種方法的文章就介紹到這了,更多相關pandas創(chuàng)建category內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關文章希望大家以后多多支持腳本之家!