python
murata htmlã®æ–‡å—変æ›ï¼ˆK,M →数å—)
def main():
import re
import glob
import os
import shutil
# åŒãƒ•ã‚©ãƒ«ãƒ€å†…ã®htmlファイルåã‚’å–å¾—
file_name = glob.glob('*.html')[0]
# 対象ファイルã®çµ¶å¯¾ãƒ‘スをå–å¾—
path = os.path.join(os.getcwd(),file_name)
# beforeフォルダã®çµ¶å¯¾ãƒ‘スをå–å¾—
before = os.path.join(os.getcwd(),"before")
# afterフォルダã®çµ¶å¯¾ãƒ‘スをå–å¾—
after = os.path.join(os.getcwd(),"after")
# フォルダã®æ–°è¦ä½œæˆï¼ˆæ—¢ã«ã‚ã‚‹å ´åˆã¯ä½œã‚‰ãªã„)
if not os.path.exists(before):
os.makedirs(before)
if not os.path.exists(after):
os.makedirs(after)
s = open('./'+file_name, 'r+',encoding="utf-16")
data_lines = s.read()
x=re.findall(r'[1-9]{1,4}.?[0-9]{0,8}[K,M]',data_lines)
def km_change(text):
if "K" in text:
changed = format(float(text.replace("K","0"))*1000, '.2f')
elif "M" in text:
changed = format(float(text.replace("M","0"))*1000000, '.2f')
return changed
for i in x:
data_lines=data_lines.replace(i,km_change(i))
# afterフォルダã¸ã®çµ¶å¯¾ãƒ‘スをå–å¾—
changed_file=os.path.join(after,file_name)
# beforeフォルダã¸ã®çµ¶å¯¾ãƒ‘スをå–å¾—
before_file=os.path.join(before,file_name)
with open(changed_file, mode="w", encoding="utf-16") as f:
f.write(data_lines)
s.close()
# 元ファイルをbeforeフォルダã¸ç§»å‹•
shutil.move(path, before_file)
if __name__=='__main__':
main()
Output
fdd
Was this helpful?
Similar Posts
- murata フォルダ内ã®ãƒ•ã‚¡ã‚¤ãƒ«ã‚’csvã«ã¾ã¨ã‚ã‚‹
- murata ファイルを拡張åã”ã¨ã«ãƒ•ã‚©ãƒ«ãƒ€ã¸æŒ¯ã‚Šåˆ†ã‘
- murata é€”ä¸ è¦æ”¹å–„ pyautoguiを使ã£ãŸExcelã‹ã‚‰ãƒ†ã‚ストファイルã¸ã®è»¢è¨˜
- murata excel転記
- murata not public netkeiba scraping
- murata not public netkeiba data processing function
- murata excel 1行目ã«ãƒ˜ãƒƒãƒ€ãƒ¼è¦‹å‡ºã—ã‚’ã¤ã‘る関数