python
murata not public netkeiba data processing function
# å‰æ
# スクレイピングã—ãŸãƒ‡ãƒ¼ã‚¿ã‚’results.pickleã¨ã„ã†ãƒ•ã‚¡ã‚¤ãƒ«åã§å–得済
import pandas as pd
# データã®èªã¿è¾¼ã¿
results=pd.read_pickle('results.pickle')
def preproccessing(results):
# å…ƒã®ãƒ‡ãƒ¼ã‚¿ã‚’変更ã—ãªã„ãŸã‚ã«ã€ã‚³ãƒ”ー
df = results.copy()
# ç€é †ã«æ•°å—以外ã®æ–‡å—列ãŒå«ã¾ã‚Œã¦ã„ã‚‹ã‚‚ã®ã‚’å–り除ã
df = df[~(df['ç€é †'].astype(str).str.contains("\D"))]
# æ•´æ•°åž‹ã«å¤‰æ›
df['ç€é †'] = df['ç€é †'].astype(int)
# 性齢を「性ã€ã€Œå¹´é½¢ã€ã«åˆ†ã‘ã‚‹
df['性'] = df['性齢'].map(lambda x:str(x)[0])
df['年齢'] = df['性齢'].map(lambda x:str(x)[1:]).astype(int)
# 馬体é‡ã‚’「体é‡ã€ã¨ã€Œä½“é‡å¤‰åŒ–ã€ã«åˆ†ã‘ã‚‹
df["体é‡"] = df["馬体é‡"].str.split('(', expand= True)[1].str[:-1].astype(int)
df["体é‡å¤‰åŒ–"] = df["馬体é‡"].str.split('(', expand= True)[1].str[:-1].astype(int)
# å˜å‹ã®åž‹ã‚’floatåž‹ã«å¤‰æ›
df['å˜å‹'] = df["å˜å‹"].astype(float)
# ä¸è¦ãªåˆ—を削除
df.drop(['タイム','ç€å·®','調教師','性齢','馬体é‡'],axis=1,inplace=True)
return df
Was this helpful?
Similar Posts
- murata not public netkeiba scraping
- Target database is not up to date - Alembic
- Error - Building wheel for backports.zoneinfo (pyproject.toml) did not run successfully [Solved]
- murata htmlã®æ–‡å—変æ›ï¼ˆK,M →数å—)
- murata フォルダ内ã®ãƒ•ã‚¡ã‚¤ãƒ«ã‚’csvã«ã¾ã¨ã‚ã‚‹
- murata ファイルを拡張åã”ã¨ã«ãƒ•ã‚©ãƒ«ãƒ€ã¸æŒ¯ã‚Šåˆ†ã‘
- murata é€”ä¸ è¦æ”¹å–„ pyautoguiを使ã£ãŸExcelã‹ã‚‰ãƒ†ã‚ストファイルã¸ã®è»¢è¨˜