# 剿
# スクレイピングã—ãŸãƒ‡ãƒ¼ã‚¿ã‚’results.pickleã¨ã„ã†ãƒ•ァイルåã§å–得済
import pandas as pd
# データã®èªã¿è¾¼ã¿
results=pd.read_pickle('results.pickle')
def preproccessing(results):
# å…ƒã®ãƒ‡ãƒ¼ã‚¿ã‚’変更ã—ãªã„ãŸã‚ã«ã€ã‚³ãƒ”ー
df = results.copy()
# ç€é †ã«æ•°å—ä»¥å¤–ã®æ–‡å—列ãŒå«ã¾ã‚Œã¦ã„ã‚‹ã‚‚ã®ã‚’å–り除ã
df = df[~(df['ç€é †'].astype(str).str.contains("\D"))]
# æ•´æ•°åž‹ã«å¤‰æ›
df['ç€é †'] = df['ç€é †'].astype(int)
# 性齢を「性ã€ã€Œå¹´é½¢ã€ã«åˆ†ã‘ã‚‹
df['性'] = df['性齢'].map(lambda x:str(x)[0])
df['年齢'] = df['性齢'].map(lambda x:str(x)[1:]).astype(int)
# 馬体é‡ã‚’「体é‡ã€ã¨ã€Œä½“é‡å¤‰åŒ–ã€ã«åˆ†ã‘ã‚‹
df["体é‡"] = df["馬体é‡"].str.split('(', expand= True)[1].str[:-1].astype(int)
df["体é‡å¤‰åŒ–"] = df["馬体é‡"].str.split('(', expand= True)[1].str[:-1].astype(int)
# å˜å‹ã®åž‹ã‚’floatåž‹ã«å¤‰æ›
df['å˜å‹'] = df["å˜å‹"].astype(float)
# ä¸è¦ãªåˆ—を削除
df.drop(['タイム','ç€å·®','調教師','性齢','馬体é‡'],axis=1,inplace=True)
return df
0 Comments