-
Install dependencies using pip from requirement.txt file in Python
pip install -r requirements.txt
-
Create python virtual environment command
virtualenv venv -p python3
-
Clone conda environment
conda create --name myclone --clone base
-
To get url param details from get request in Django
server = request.GET.get('server_info','production')
count = request.GET.get('limit_count')
user_id = request.GET.get('user_id')
customer_id = request.GET.get('alie_customer_id',None)
studio_id = request.GET.get('studio_id',None)
print(count)
print(user_id)
print(customer_id)
print(studio_id)
-
First highest views based recommendation system in python part 1
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
view_data = df1.drop(["timestamp","user_email","user_name"],axis=1)
movie_df = pd.merge(df2, view_data, on="movie_id")
movie_grouped = movie_df.groupby(['movie_name','movie_id']).agg({'no_of_views': 'count'}).reset_index()
grouped_sum = movie_grouped['no_of_views'].sum()
movie_grouped['percentage'] = movie_grouped['no_of_views'].div(grouped_sum)*100
movie_final = pd.DataFrame(movie_grouped)
count = pd.DataFrame(movie_final)
final_out = pd.merge(count, view_data, on="movie_id")
final_up =final_out.drop_duplicates(subset='movie_id', keep='first', inplace=False)
final = final_up.drop(["no_of_views_x","no_of_views_y","percentage"],axis=1)
# converting it to csv with store id
final_csv = final.to_csv('public/csv/'+str(studio_id)+'.csv')
# opening the exported csv file to save to mongodb
f = open('public/csv/'+ str(studio_id) +'.csv','rU')
#db URI
col = connect_db(server).recomendations
#reading csv
reader = csv.DictReader(f)
#askign desired header
fieldnames=("movie_name","movie_id","user_id","customer_id","store_id")
# loop to dismantle the dictionary as per the
if(mode == 1):
coly = connect_db(server).recomendations.delete_many({"customer_id":str(studio_id)})
else:
coly = connect_db(server).recomendations.delete_many({"store_id":str(studio_id)})
# colx = connect_db(server).recomendations.delete_many({"store_id_x":"101"})
for each in reader:
row={}
for field in fieldnames:
row[field] = each[field]
col.insert(row)
# cleaning the dataframes
df1.iloc[0:0]
df2.iloc[0:0]
#deleting the csv after exporting to database
# os.remove('public/csv/'+str(studio_id)+'.csv')
print('Training Done !!')
-
Text Editor with Python
from tkinter import Tk,Menu,filedialog,END,messagebox
import tkinter.scrolledtext as ScrolledText
#Global File Name
filename = 'Untitled.txt'
#root for main window
root = Tk(className = " Text Editor Noob")
root.iconbitmap(r'icon.ico')
root.columnconfigure(0, weight=1)
root.rowconfigure(0, weight=1)
textArea = ScrolledText.ScrolledText(root,width=100, height =80)
#FUNCTIONS
#Open file function
def openFile():
file =filedialog.askopenfile(parent=root,title="Please Select a file to open",filetypes=(("Text File","*.txt"),("All Files","*")))
if file != None:
contents = file.read()
textArea.insert('1.0',contents)
file.close()
#Save as File function
def fileSaveAs():
file = filedialog.asksaveasfile(mode="w",defaultextension=".txt")
if file != None:
#slice of the last character from get,as an extra return (enter) is added
data = textArea.get('1.0',END+'-1c')
file.write(data)
file.close()
#Exit Function
def quitroot():
if messagebox.askyesno("Quit Text Editor Noob","Are you sure you want to quit?Because Biswas will mind if you quit X0X0"):
root.destroy()
#About function
def About():
label = messagebox.showinfo("About TextEditor Noob","An alternative notepad made with the help of Python and Developed By Biswas Sampad Satpathy")
#Menu options
menu = Menu(root)
root.config(menu=menu)
#top Menu
fileMenu = Menu(menu)
menu.add_cascade(label=" File",menu = fileMenu)
#sub menus
fileMenu.add_command(label="New")
fileMenu.add_command(label="Open",command = openFile)
fileMenu.add_command(label="Save As",command = fileSaveAs)
fileMenu.add_command(label="Print")
fileMenu.add_separator()
fileMenu.add_command(label="Close",command = quitroot)
#top menu
helpMenu = Menu(menu)
menu.add_cascade(label="Help",menu=helpMenu)
helpMenu.add_command(label="Help")
#top menu
aboutMenu = Menu(menu)
menu.add_cascade(label="About",command=About)
textArea.pack()
#keep window open
root.mainloop()
-
Pattern
for i in range (0,5):
for j in range (0,i+1):
print("* ",end="")
print()
-
# Python Program - Pattern Program 2
k=1
for i in range(0,5):
for j in range(0,k):
print("* ",end=" ")
k=k+2
print()
-
# Python Program - Convert Hexadecimal to Binary
print("Enter 'x' for exit."):
hexdec =input("Enter any number in Hexadecimal Format: "):
if hexadec == 'x':
exit():
else:
dec=int(hexdec,16):
print(hexdec,"in Binary ="bin(dec)):
-
Python - String , Check If Word Is In string
if word in mystring:
print ('success')
# or
if 'seek' in 'those who seek shall find':
print('Success!')
-
Python - regex , replace multiple spaces with one space
# Substituting multiple spaces with single space
# replace multiple spaces with one space
# delete spaces
# spaces delete
# tabs replace with one space
# tow ore more spaces betwen words
import re
text = "That is a test for removing multiple spaces"
document = re.sub(r's+', ' ', text, flags=re.I)
print(document)
-
Python - regex,replace all character exept A-Z a-z and numbers
# Remove all the special characters
# replace all character exept A-Z a-z
import re
text = "That is a # ; '' 2 45 6 ?/..,,, test for removing multiple spaces"
document = re.sub(r'W', ' ', text)
print(document) # as we can see this regex expresion do not remove numbers
-
Python - regex , remove all single characters,replace all single chars,char
# remove all single characters
# replace all single chars,char
import re
text = "That is a test for removing multiple spaces"
document = re.sub(r's+[a-zA-Z]s+', ' ', text)
print(document)
-
Python - String ,remove,delete,replace,extract numbers from string
# remove numbers from string
s = '12abcd405'
result = ''.join([i for i in s if not i.isdigit()])
-
Python - String, renove all single character for text
# renove all single character for text
text = "t is a c test for removing multiple spaces"
document =' '.join( [w for w in text.split() if len(w)>1] )
print(document)
-
first python code
print(1)
-
Pandas - How to check whether a pandas DataFrame is empty
if df.empty:
print('DataFrame is empty!')
-
Pandas - Append multiple pandas data frames at once
new_df = pd.concat([df1, df2, df3, df4, df5])
#ore
new_df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)
-
Pandas - Delete,Remove,Drop, column from pandas DataFrame
# Opiton one
del df['column_name']
# The best way to do this in pandas is to use drop:
df = df.drop('column_name', 1)
# where 1 is the axis number (0 for rows and 1 for columns.) To delete the column without having to reassign df you can do:
df.drop('column_name', axis=1, inplace=True)
# Finally, to drop by column number instead of by column label, try this to delete, e.g. the 1st, 2nd and 4th columns:
df = df.drop(df.columns[[0, 1, 3]], axis=1) # df.columns is zero-based pd.Index this is called drop by index
# Another way is:
columns = ['Col1', 'Col2', ...]
df.drop(columns, inplace=True, axis=1)
-
Python Measure the execution time of small bits of Python code with the timeit module
>>> import timeit
>>> timeit.timeit('"-".join(str(n) for n in range(100))',
number=10000)
0.3412662749997253
>>> timeit.timeit('"-".join([str(n) for n in range(100)])',
number=10000)
0.2996307989997149
>>> timeit.timeit('"-".join(map(str, range(100)))',
number=10000)
0.24581470699922647
-
Function argument unpacking in Python
def myfunc(x, y, z):
print(x, y, z)
tuple_vec = (1, 0, 1)
dict_vec = {'x': 1, 'y': 0, 'z': 1}
>>> myfunc(*tuple_vec)
1, 0, 1
>>> myfunc(**dict_vec)
1, 0, 1
-
[Python] Use json.dumps() to pretty-print Python dicts
# The standard string repr for dicts is hard to read:
>>> my_mapping = {'a': 23, 'b': 42, 'c': 0xc0ffee}
>>> my_mapping
{'b': 42, 'c': 12648430. 'a': 23} # 😞
# The "json" module can do a much better job:
>>> import json
>>> print(json.dumps(my_mapping, indent=4, sort_keys=True))
{
"a": 23,
"b": 42,
"c": 12648430
}
# Note this only works with dicts containing
# primitive types (check out the "pprint" module):
>>> json.dumps({all: 'yup'})
TypeError: keys must be a string
-
[Python] Namedtuples can be a great alternative to defining a class manually
# Using namedtuple is way shorter than
# defining a class manually:
>>> from collections import namedtuple
>>> Car = namedtuple('Car', 'color mileage')
# Our new "Car" class works as expected:
>>> my_car = Car('red', 3812.4)
>>> my_car.color
'red'
>>> my_car.mileage
3812.4
# We get a nice string repr for free:
>>> my_car
Car(color='red' , mileage=3812.4)
# Like tuples, namedtuples are immutable:
>>> my_car.color = 'blue'
AttributeError: "can't set attribute"
-
[Python] The get() method on Python dicts and its "default" arg
# The get() method on dicts
# and its "default" argument
name_for_userid = {
382: "Alice",
590: "Bob",
951: "Dilbert",
}
def greeting(userid):
return "Hi %s!" % name_for_userid.get(userid, "there")
>>> greeting(382)
"Hi Alice!"
>>> greeting(333333)
"Hi there!"
''' When "get()" is called it checks if the given key exists in the dict.
If it does exist, the value for that key is returned.
If it does not exist then the value of the default argument is returned instead.'''
-
[Python] Different ways to test multiple flags at once in Python
# Different ways to test multiple
# flags at once in Python
x, y, z = 0, 1, 0
if x == 1 or y == 1 or z == 1:
print('passed')
if 1 in (x, y, z):
print('passed')
# These only test for truthiness:
if x or y or z:
print('passed')
if any((x, y, z)):
print('passed')
-
[Python] F-strings format syntax
>>> F"Hello, {name}. You are {age}."
'Hello, Eric. You are 74.'
-
[Python] F-strings format syntax call functions
>>> name = "Eric Idle"
>>> f"{to_lowercase(name)} is funny."
'eric idle is funny.'
>>> f"{name.lower()} is funny."
'eric idle is funny.'
-
[Python] Pytest fixture example with cleanup and teardown
# Deafult scope=function
@pytest.fixture
def browser():
# Initialize Chrome driver
driver = Chrome()
# Wait implicitly for elements to be ready before attempting interactions
driver.implicitly_wait(10)
# Return the driver object at the end of setup
yield driver
# For cleanup, quit the driver
driver.quit()
-
[Python] Using comprehension expression to convert list of dictionaries to nested dictionary
# yaml file used for account
- sitename: testcomp
email: test@outlook.com
pass: 111
role: admin
name: admin
- sitename: testcomp
email: mvsubmit@gmail.com
pass: 111
role: edit
name: user1
- sitename: testcomp
email: test2@outlook.com
pass: 11
role: edit
name: user2
# convert.py
# read yaml file into faccounts
faccounts = yaml.load(file, Loader=yaml.FullLoader)
# Convert list of dictionaries to nested dictionary so that we can select any value by user name and key nam
account = {account['name']: account for account in faccounts}
return account
-
[Python] Inheritance and adding variable. Page object example
# page object of Dashboard page. SitePAge - parent class
class SiteDashboardPage(SitePage):
PAGE_TITLE = (By.CLASS_NAME, 'page-title')
def __init__(self, browser):
SitePage.__init__(self, browser)
self.page_title = self.browser.find_element(*self.PAGE_TITLE).text
-
[Python] F-string format datetime
import datetime
now = datetime.datetime.now()
print(f'{now:%Y-%m-%d %H:%M}')
# The example displays a formatted current datetime. The datetime format specifiers follow the : character.
# This is the output.
2019-05-11 22:39
-
[Python] Create a list of objects using class names
# In Python, the name of the class refers to the class instance. Consider:
class A: pass
class B: pass
class C: pass
lst = [A, B, C]
# instantiate second class
b_instance = lst[1]()
print(b_instance)
-
[Python] Range - standard function to create a list of numbers
for a in range(1,11):
print("a is ", a)
-
[Python] Slice of tuple
mytuple = (10,20,30,45,60,70)
for x in mytuple[0:3]:
print(x)
print(============)
# iterate through first 2 numbers
for x in mytuple[:2]:
print(x)
print(============)
# iterate through last (6-4)=2 numbers
for x in mytuple[4:]:
print(x)
print(============)
mystring= "abcdefghijkl"
# iterate through string symbols starting fron 2nd, skip every 2 symbols
for x in mystring[2::2]:
print(x)
-
[Python] Sort a list
mylist = [900, 20, 300, 5, 90]
mylist.sort()
for x in mylist:
print(x)
-
[Python] Function returns multiple values
def my_func():
return 1,2
x,y = my_func()
print(x)
print(y)
-
[Python] Function parameters - pack and unpack
# Asterisk turns parameter into tuple reference
def average(*nums):
result = sum(nums)/len(nums)
return result
# Example 1
x = average(10,20,30)
print('Example 1(pack): ',x)
# Example 2
x = average(10,20,30,40,50)
print('Example 2(pack): ', x)
# Asterisk unpacks tuple before sending to the function
# Example 3
mytuple = (10,20,30)
x = average(*mytuple)
print('Example 3(unpack): ', x)
-
[Python] Eval() function
# The eval() method parses the expression passed to this method
# and runs python expression (code) within the program.
x = 1
print(eval('x + 1'))
-
[Python] Save (write) data to file
myfile = open('hello.txt', 'w')
myfile.write('Hello world')
myfile.close()
-
[Python] Two ways to import a module
# Option 1: we have to specify 'sys'
import sys
if len(sys.argv) == 1:
print("No arguments specified")
print("This is the name of the script:", sys.argv[0] )
# Option 2
from sys import argv
if len(argv) == 1:
print("No arguments specified")
print("This is the name of the script:", argv[0] )
-
[Python] Get nodeid, module, function name using Pytest request built-in fixture
# to enable logging add to pytest.ini the following:
# [pytest]
#log_cli = 1
# log_level = INFO
# log_cli_level = INFO
# log_cli_format = %(asctime)s %(levelname)s %(message)s
# log_cli_date_format = %H:%M:%S
import pytest
import logging
# Example 1
def test_node_id(request):
node = request.node.nodeid
logging.info(f"\nThis is node: {node}")
module_name = request.module.__name__
logging.info(f"\nThis is module name: {module_name}")
function_name = request.function.__name__
logging.info(f"\nThis is function name: {function_name}")
fspath = request.fspath
logging.info(f"\nThis is fspath : {fspath}")
# Example 2
def test_func_node_id(request):
req = request
def foo(r):
node = r.node.nodeid
logging.info(f"
This is function node: {node}")
foo(req)
-
[Python] Default value of function argument
# Example demonstrates usage of function arguments default values
def foo(filename='testid', flag = True, number = None):
pdf_file = filename + ".pdf"
print(f'\n Filename = {pdf_file}, Flag = {flag}, number = {number}')
if __name__ == '__main__':
foo()
foo("readme", False, 4)
-
[Python] Simple yield generator example
# Generator function
def generator(count):
i = 0
while i < count:
yield i
i += 1
def test_generator():
for value in generator(4):
print("\nGenerated value :", value)
pass
-
[Python] Using inspect.stack() to get module name, path, function name
import pytest
import logging
import inspect
def test_inspect():
caller = inspect.stack()[1]
logging.info(f"\nThis is caller[3]: {caller[3]}")
def foo():
caller = inspect.stack()[1]
logging.info(f"\nThis is caller[3] inside function: {caller[3]}")
for item in caller:
logging.info(f"\nThis is caller item: {item}")
foo()
-
Demo
class Demo:
def __init__(self):
self.a = 10
self.b = 20
def get_total_of_a_b:
total = self.a + self.b
return total
-
murata htmlの文字変換(K,M →数字)
def main():
import re
import glob
import os
import shutil
# 同フォルダ内のhtmlファイル名を取得
file_name = glob.glob('*.html')[0]
# 対象ファイルの絶対パスを取得
path = os.path.join(os.getcwd(),file_name)
# beforeフォルダの絶対パスを取得
before = os.path.join(os.getcwd(),"before")
# afterフォルダの絶対パスを取得
after = os.path.join(os.getcwd(),"after")
# フォルダの新規作成(既にある場合は作らない)
if not os.path.exists(before):
os.makedirs(before)
if not os.path.exists(after):
os.makedirs(after)
s = open('./'+file_name, 'r+',encoding="utf-16")
data_lines = s.read()
x=re.findall(r'[1-9]{1,4}.?[0-9]{0,8}[K,M]',data_lines)
def km_change(text):
if "K" in text:
changed = format(float(text.replace("K","0"))*1000, '.2f')
elif "M" in text:
changed = format(float(text.replace("M","0"))*1000000, '.2f')
return changed
for i in x:
data_lines=data_lines.replace(i,km_change(i))
# afterフォルダへの絶対パスを取得
changed_file=os.path.join(after,file_name)
# beforeフォルダへの絶対パスを取得
before_file=os.path.join(before,file_name)
with open(changed_file, mode="w", encoding="utf-16") as f:
f.write(data_lines)
s.close()
# 元ファイルをbeforeフォルダへ移動
shutil.move(path, before_file)
if __name__=='__main__':
main()
-
murata フォルダ内のファイルをcsvにまとめる
import os
import csv
import time
csv_file = 'filelist.csv'
date_format = '%Y/%m/%d %H:%M:%S'
file_list = []
for file in os.listdir("."):
# ファイルかどうか
is_file = os.path.isfile(file)
# このpyファイル自身でないか
not_py_file = os.path.basename(__file__) != file
# リストCSVファイルでないか
not_csv_file = csv_file != file
if is_file and not_py_file and not_csv_file:
# ファイル作成時刻
time_crt = time.strftime(date_format,
time.localtime(os.path.getctime(file)))
# ファイル更新時刻
time_mod = time.strftime(date_format,
time.localtime(os.path.getmtime(file)))
file_list.append([file, time_crt, time_mod])
with open(csv_file, "w", newline="") as f:
csv_writer = csv.writer(f)
for r in file_list:
csv_writer.writerow(r)
-
murata ファイルを拡張子ごとにフォルダへ振り分け
import os
import pathlib
import shutil
# 「Untitled.html」というようなファイル名+拡張子のリスト用
file_list = []
# after_file_list = []
# . が含まれる者すべてを抽出
for file in os.listdir("."):
# ファイルかどうかを判定し、フォルダ等を除外
is_file = os.path.isfile(file)
# このpyファイル自身でないか
# jupyter notebookの場合、エラーになるので、直接ファイル名入力
not_this_file = os.path.basename(__file__) != file
if is_file and not_this_file:
file_list.append(file)
# after_file_list.append(after_file_name)
current_dir = os.getcwd()
for target_file in file_list:
# 拡張子を抽出
target_ext = target_file.split(".")[-1]
# 拡張子が付いた新しいフォルダパスを作成
new_folder_path = os.path.join(current_dir,target_ext)
# 現在のファイルパスを作成
current_file_path = os.path.join(current_dir,target_file)
# 新しいファイルパスを作成
new_file_path = os.path.join(new_folder_path,target_file)
# 新しく拡張子名のフォルダを作成。
os.makedirs(new_folder_path,exist_ok=True)
# ファイルを移動。(既存のファイルがあってもエラーにならないように、絶対パスで記述)
shutil.move(current_file_path, new_file_path)
-
murata 途中 要改善 pyautoguiを使ったExcelからテキストファイルへの転記
import pyautogui as pg
import time
import pyperclip
x,y=pg.locateCenterOnScreen('./picture_for_gui/yuubinn.png',confidence=0.1)
for i in range(10):
if i == 0:
pg.click(1223,1064,duration=0.5)
pg.moveTo(x,y,duration=1)
pg.press('down')
pg.hotkey('ctrl','c')
time.sleep(1)
pg.click(1305,1045,duration=0.5)
pg.click()
time.sleep(1)
pg.hotkey('return')
pg.typewrite('yuubinnbangou;\n')
pg.hotkey('ctrl','v')
pg.click(1223,1064,duration=0.5)
pg.press('tab')
pg.hotkey('ctrl','c')
pg.click(1305,1045,duration=0.5)
pg.hotkey('return')
pg.typewrite('jyuusyo;\n')
pg.hotkey('ctrl','v')
continue
pg.click(1223,1064,duration=0.5)
pg.click()
pg.hotkey('ctrl','left')
pg.press('down')
pg.hotkey('ctrl','c')
time.sleep(1)
pg.click(1305,1045,duration=0.5)
pg.click()
time.sleep(1)
pg.hotkey('return')
pg.typewrite('yuubinnbangou;\n')
pg.hotkey('ctrl','v')
pg.click(1223,1064,duration=0.5)
pg.press('tab')
pg.hotkey('ctrl','c')
pg.click(1305,1045,duration=0.5)
pg.hotkey('return')
pg.typewrite('jyuusyo;\n')
pg.hotkey('ctrl','v')
-
murata excel転記
# masters_copy.py
import openpyxl
# 1) エクセルファイルを開く
wb = openpyxl.load_workbook("sample.xlsx")
ws = wb["住所"]
# 重要住所の行
masters_rows = []
header_row = ws[1]
# 2) エクセルファイルを1行ずつ読み込み(2行目から)、
for row in ws.iter_rows(min_row=2):
# マークの付いている行だけをリストに入れておく
if row[2].value == "〇":
masters_rows.append(row)
masters_rows.insert(0,header_row)
# 3) 新規シートを作成する
ws2 = wb.create_sheet(title="重要住所")
# 4) 新規シートにリストに入れておいた行を転記する
for master_row in masters_rows:
ws2.append([r.value for r in master_row])
# 3列目削除
ws2.delete_cols(3)
wb.save("sample.xlsx")
wb.remove(ws)
# 5) エクセルファイルを保存する
wb.save("重要住所.xlsx")
-
Add comments in python
#Write comment text here
print("Hello World 1!")
print("Hello World 2!") #OR like this
"""
This is a multiline comment
You can add as much lines as you want
"""
print("Hello World 3!")
-
If else conditions python
x = 50
y = 30
if x > y:
print("x is greater than y")
elif x == y:
print("x equals to y")
else:
print("y is greater than x")
-
For loop python
#LOOP THROUGH LIST
names = ["John", "Sonia", "Rakesh"]
for item in names:
print(item)
#LOOP THROUGH DICTIONARY
my_dict = {
"name": "John",
"title": "Programmer",
"lang": "Python"
}
for key in my_dict:
print(key)
-
Create a function in python
def function_name():
print("Hello this is a function")
#To call the above function use
function_name()
-
Arbitrary Arguments *args in python
def favourite_fruit(*args):
print("My favourite fruit is : " + args[1])
favourite_fruit("Apple", "Mango", "Banana")
-
Python collection - List in python
list = ["pen", "pencil", "sharpner", "eraser", "board"]
print(list[2]) #print third element sharpner
print(list[:3]) #print items start index to 0 and less than index 3
print(list[1:3]) #print list start from index 1 and less than index 3
print(list[-1]) #print last element
-
murata not public netkeiba scraping
import pandas as pd
import time
# プログレスバーの表示
from tqdm import tqdm_notebook as tqdm
# 対象のurlの構造分析
# 'https://db.netkeiba.com/race/202006030301'
# ex.
# 202006030301について
# 2020 → 西暦
# 06 → 開催場 ('札幌', '函館', '福島', '新潟', '東京', '中山', '中京', '京都', '阪神', '小倉')
# 03 → 上記開催場の何回目
# 03 → 上記開催場の上記回の何日目
# 01 → レース番号
race_id_list = []
# 開催上
for place in range(1,11,1):
# ~回
for kai in range(1,6,1):
# ~日
for day in range(1,9,1):
# ~R
for r in range(1,13,1):
race_id = "2019"+ str(place).zfill(2)+str(kai).zfill(2)+str(day).zfill(2) + str(r).zfill(2)
race_id_list.append(race_id)
# 途中中断し、再開した時を想定し、読み込み済みの結果を第2引数に
def scrape_race_results(race_id_list,pre_race_results={}):
# 既に読み込み済の結果を反映
race_results = pre_race_results
# tqdmにより、進捗を表示させる
for race_id in tqdm(race_id_list):
# レースIDが読み込み済のリスト内にある場合は、ここで処理を中断し、for文へ戻す
if race_id in race_results.keys():
print(race_id)
continue
try:
url = "https://db.netkeiba.com/race/" + race_id
race_results[race_id] = pd.read_html(url)[0]
time.sleep(1)
# 該当のIDにレース情報がなかった場合にも中断しないようにする。
except IndexError:
continue
# スクレイピング自体を中断しても、race_resultsを返してくれるようにする。
except:
break
return race_results
# テスト中断して実行するときに、左辺と右辺をそれぞれ1ずつずらして実行する(ex. test3 = ~ test2)
test2 = scrape_race_results(race_id_list,test)
# 取得した各レースの結果データ(今は辞書型でそれぞれ分かれている)をつなげる処理
# つなげた際に識別できるように、IDをレースIDにしておく。
# 全てのレースIDを一つずつ取り出し、
for key in test.keys():
# 各着版ごとにつけられているインデックスをレースIDに変換。長さは各レースのデータフレームの長さ(馬の数)分だけ置き換える。
test[key].index = [key]*len(test[key])
# 全てをつなげ、1つのデータフレームにまとめる。行が入れ替わらないようにsort=Falseを設定。
results = pd.concat([test[key] for key in test.keys()], sort=False)
# pickle形式で保存
results.to_pickle('results.pickle')
# csvで保存
results.to_csv("results.csv", encoding='utf_8_sig')
-
Python collection - Tuples in python
#Tuples are unchangable means you can not modify its items
fruits = ("banana", "orange", "apple", "grapes")
print(fruits[2]) #print third element apple
print(fruits[:3]) #print items start index to 0 and less than index 3
print(fruits[1:3]) #print tuple start from index 1 and less than index 3
print(fruits[-1]) #print last element
-
Python collection - sets in python
my_set = {"item_1", "item_2", "item_3"}
"""
sets are unordered and unindexed
so it can not be accessed by using index
"""
for (item in my_set):
print(item)
my_set.add("item_4") #To add single item to set
my_set.update(["item_4", "item_5"]) #To add multiple items to set
my_set.remove("item_3") #To remove an item
del my_set #To remove set
-
Python collection - Dictionaries in python
my_dict = {
"first_name": "John",
"last_name": "Deo",
"occupation": "programmer"
}
print(my_dict["first_name"]) #print first_name value of dictionary
print(my_dict.get("first_name")) #same as above
my_dict["first_name"] = "Josef" #To change first_name value
my_dict.values() #return values of dictionary
my_dict["nick_name"] = "JDo" #To add key value to dictionary
my_dict.pop("last_name") #To remove a key value pair from dictionary based on key
del my_dict["last_name"] #Same as above
my_dict.clear() #empty a dictionary
my_dict.copy() #To copy a dictionary
-
murata not public netkeiba data processing function
# 前提
# スクレイピングしたデータをresults.pickleというファイル名で取得済
import pandas as pd
# データの読み込み
results=pd.read_pickle('results.pickle')
def preproccessing(results):
# 元のデータを変更しないために、コピー
df = results.copy()
# 着順に数字以外の文字列が含まれているものを取り除く
df = df[~(df['着順'].astype(str).str.contains("\D"))]
# 整数型に変換
df['着順'] = df['着順'].astype(int)
# 性齢を「性」「年齢」に分ける
df['性'] = df['性齢'].map(lambda x:str(x)[0])
df['年齢'] = df['性齢'].map(lambda x:str(x)[1:]).astype(int)
# 馬体重を「体重」と「体重変化」に分ける
df["体重"] = df["馬体重"].str.split('(', expand= True)[1].str[:-1].astype(int)
df["体重変化"] = df["馬体重"].str.split('(', expand= True)[1].str[:-1].astype(int)
# 単勝の型をfloat型に変換
df['単勝'] = df["単勝"].astype(float)
# 不要な列を削除
df.drop(['タイム','着差','調教師','性齢','馬体重'],axis=1,inplace=True)
return df
-
murata excel 1行目にヘッダー見出しをつける関数
# osh シートオブジェクト名と仮定
def print_header():
osh["A1"].value = "担当者"
osh["B1"].value = "数量"
osh["C1"].value = "金額"
osh["D1"].value = "得意先"
osh["E1"].value = "数量"
osh["F1"].value = "金額"
-
How to install packages using pip according to the requirements.txt file from a local directory?
pip install -r /path/to/requirements.txt
-
murata 正規表現 M30 (2003.11.01 - 2019.12.01)の抽出(桁数と改良余地あり)
text = "barabrara M30 (2003.11.01 - 2019.12.01) brarbrarbab"
re.findall(r'[A-Z][0-9]+ \([0-9]+\.[0-9]+\.[0-9]+ - [0-9]+\.[0-9]+\.[0-9]+\)',text)[0]
-
html hsdata_file_name_change(途中)
import re
import glob
import os
import shutil
import csv
# 同フォルダ内のhtmlファイル名を取得
file_name = glob.glob('*.csv')[0]
if "~" not in file_name:
print("実行する")
# ここにコードを記述
f = open(file_name)
reader = csv.reader(f)
l = [row for row in reader]
beginning=l[1][0]
ending=l[-1][0]
beginning.split("/")
start_date=beginning.split("/")[2] + "-" + beginning.split("/")[0] + "-" + beginning.split("/")[1]
ending.split("/")
end_date=ending.split("/")[2] + "-" + ending.split("/")[0] + "-" + ending.split("/")[1]
start_date + "~" + end_date
new_file_name=file_name.replace("FromDownloader","")
new_file_name=new_file_name.replace("_BidAndAsk","")
new_file_name = new_file_name + "["+start_date + "~" + end_date+"]"
f.close()
os.rename(file_name,new_file_name)
else:
print("実行しない")
-
murata private 複数エクセルファイル内の値をまとめて1つのCSVファイルに出力する
import pathlib # 標準ライブラリ
import openpyxl # 外部ライブラリ pip install openpyxl
import csv # 標準ライブラリ
# ワークブックを作成
new_workbook = openpyxl.Workbook()
# デフォルトで作成されるワークシートを選択
new_sheet = new_workbook.active
new_row = 1
# ..\ → このプログラムの親フォルダを示す。(.\だけだとカレントフォルダを示す。)
path = pathlib.Path("..\data\targetfolder") #相対パス指定
# path.iterdir() → ディレクトリ内のファイル・サブディレクトリ一覧を取得
for all_obj in path.iterdir():
# 拡張子がxlsxかを判定
if all_obj.match("*.xlsx"):
# ワークブックの読み込み //最初からglobを使用すればいいのでは?
wb = openpyxl.load_workbook(all_obj)
# ワークブックからシートを順に読み込み
for target_sheet in wb:
#シートの9行目から18行目までを操作するように指示 //xlupのように始まりと終わりが分からない場合に対応させるには?
for target_row in range(9,19):
# 対象行の2番目の列に、データが入力されいるかを調べる。
if target_sheet.cell(target_row, 2).value != None:
#より説明的なコード
#new_sheet.cell(row=new_row, column=1).value = \
# target_sheet.cell(row=2, column=7).value #伝票NO
#対象ファイルの2行目7列目を、作成したシートの対象行のlist_row行目の1列目に転記
new_sheet.cell(new_row, 1).value = target_sheet.cell(2, 7).value #伝票NO
# 以下同様の考え
new_sheet.cell(new_row, 2).value = target_sheet.cell(3, 7).value #日付
new_sheet.cell(new_row, 3).value = target_sheet.cell(4, 3).value #得意先コード
new_sheet.cell(new_row, 4).value = target_sheet.cell(7, 8).value #担当者コード
#対象ファイルのdt_row行目の1列目を、作成したシートのlist_row行目の1列目に転記
new_sheet.cell(new_row, 5).value = target_sheet.cell(target_row, 1).value #No
#以下同様
new_sheet.cell(new_row, 6).value = target_sheet.cell(target_row, 2).value #商品コード
new_sheet.cell(new_row, 7).value = target_sheet.cell(target_row, 3).value #商品名
new_sheet.cell(new_row, 8).value = target_sheet.cell(target_row, 4).value #数量
new_sheet.cell(new_row, 9).value = target_sheet.cell(target_row, 5).value #単価
# 「\」 → 行継続の記号
# 対象ファイルのdt_row行目の4列目と5列目の積を、作成したシートのlist_row行目の10列目に転記
new_sheet.cell(new_row, 10).value = target_sheet.cell(target_row, 4).value * \
target_sheet.cell(target_row, 5).value #金額
new_sheet.cell(new_row, 11).value = target_sheet.cell(target_row, 7).value #備考
# 新しいデータ下に追加できるように、list_rowに1を加算
new_row += 1
#new_workbook.save("..\data\sales\salesList.xlsx")
# ファイルの保存
# 引数として、ファイル名、モード、エンコーディングを指定
with open("..\newList.csv","w",encoding="utf_8_sig") as fp:
writer = csv.writer(fp, lineterminator="\n")
# 作成したシートの各行を取得
for row in new_sheet.rows:
# 各行から各列を取り出し、リストにする。
# その1行分のリストをwriter.writerow()により、csvとして出力する。
writer.writerow([col.value for col in row])
-
Transfer libs to requirements.txt (Automatically create requirements.txt)
pip freeze > requirements.txt
-
test_2
print("haelo")
-
test_3
print("This the second example")
-
Hello World
print("Hello World")
-
Check python version on Mac and Windows
python -V
-
Pandas DataFrame to CSV
# Use custom separator sep
df.to_csv('file_name.csv',sep='\t')
# Write without index index
df.to_csv('file_name.csv',index=False)
# Write without headers header
df.to_csv('file_name.csv',header=False)
# Write subset of columns columns
df.to_csv('file_name.csv',columns=['col_A','col_B'])
# Change file encoding format encoding
df.to_csv('file_name.csv',encoding='utf-8')
# Change NaNs as Unknown na_rep
df.to_csv('file_name.csv',na_rep='Unknown')
-
Evaluate,check before class feald values set, bothe constructor,property option
class Product(object):
def __init__(self, price = 0.0, name = ""):
self.price = price
self.name = name
# property for __price attribute
@property
def price(self):
return self.__price
@price.setter
def price(self, value):
if value < 0:
raise ValueError("Price cannot be negative")
self.__price = value
# property for __name attribute
@property
def name(self):
return self.__name
@name.setter
def name(self, value):
for ch in value:
if ch.isdigit():
raise Exception("Enter valid product name")
self.__name = value
-
Jupyter Notebooks
#store variables
%store stream_whole
#read variables
%store -r stream_whole
-
Multiple constructors in python
class Cheese(object):
def __init__(self, num_holes=0):
"defaults to a solid cheese"
self.number_of_holes = num_holes
@classmethod
def random(cls):
return cls(randint(0, 100))
@classmethod
def slightly_holey(cls):
return cls(randint(0, 33))
@classmethod
def very_holey(cls):
return cls(randint(66, 100))
# Now create object like this:
gouda = Cheese()
emmentaler = Cheese.random()
leerdammer = Cheese.slightly_holey()
# another example
class C(object):
def __init__(self, fd):
# Assume fd is a file-like object.
self.fd = fd
@classmethod
def fromfilename(cls, name):
return cls(open(name, 'rb'))
# Now you can do:
c = C(fd)
# or:
c = C.fromfilename('a filename')
-
Join two tuples in python
tuple1 = (1, 2, 3, 4)
tuple2 = (5, 6, 7, 8)
tuple3 = tuple1 + tuple2
print(tuple3)
-
PyQt5 Fixed Window Size
class MyDialog(QtWidgets.QDialog):
def __init__(self):
super(MyDialog, self).__init__()
self.setFixedSize(640, 480)
-
Integer to String conversion in Python
intVal = 10
strVal = str(intVal)
-
convert pyqt5 resource file .qrs to python file?
pyrcc5 -o resources.py resources.qrc
-
test url
test_url = 'http://httpbin.org/status/404'
-
Check S3 Bucket
import os
import boto3
from botocore.exceptions import ClientError
client = boto3.client(
's3',
# Hard coded strings as credentials, not recommended.
aws_access_key_id='',
aws_secret_access_key=''
)
s3 = boto3.resource(
's3',
# Hard coded strings as credentials, not recommended.
aws_access_key_id='',
aws_secret_access_key=''
)
# print(bucket_subfolder)
bucket = 'bucket-name'
for my_bucket_object in s3.Bucket(bucket).objects.all():
print(my_bucket_object)
-
Join two videos using moviepy in Python
from moviepy.editor import VideoFileClip, concatenate_videoclips
video_1 = VideoFileClip("path/to/first/video.mp4")
video_2 = VideoFileClip("path/to/second/video.mp4")
final_video= concatenate_videoclips([video_1, video_2], method="compose")
final_video.write_videofile("path/to/final/video.mp4")
-
Cut a portion of a video using moviepy
# pip install moviepy
# pip install imageio-ffmpeg
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
ffmpeg_extract_subclip("path/to/full/video.mp4", 30, 100, targetname="path/to/final/video.mp4")
# 30 - start second
# 100 - end second
-
Float to int Python
float_val = 12.98;
print( int(float_val) );
# 12
-
Get tofixed of a value using round() Python
val = 12.89
val1 = round(val, 0)
val2 = round(val, 1)
print(val1)
print(val2)
# 13.0
# 12.9
-
List all files of a directory using os Python
from os import listdir
from os.path import isfile, join
dirPath = "path/to/my/directory"
allFileAndFolders = listdir(dirPath)
allFiles = [f for f in allFileAndFolders if isfile(join(dirPath, f))]
-
While loop python
counter = 1
while counter < 6:
print(counter)
counter += 1
#WITH ELSE STAEMENT
while counter < 6:
print(counter)
counter += 1
else:
print("counter is greater or equals to 6");
-
Function in python
#SIMPLE FUNCTION
def function_name():
print("Hello world")
function_name() #CALL FUNCTION
# FUNCTION WITH ARGUMENTS
def function_with_args(first_name, last_name):
print("First name is " + first_name + ". Last name is" + last_name)
function_with_args("John", "Deo") #CALL FUNCTION WITH ARGUMENTS
#CALL WITH KEYWORDS ARGUMENTS
#ARGUMENT ORDER DOES NOT MATTER
function_with_args(last_name = "Deo", first_name = "John")
#WITH ARBITARY ARGUMENTS
#USE WHEN NUMBER OF ARGUMENT PASSED IS UNKNOWN
def function_with_arbitary_args(*args):
print("Third item is " + args[2])
function_with_arbitary_args("Bat", "Bowl", "Hockey") #WILL PRINT 'HOCKEY'
#WITH ARBITARY KEYWORD ARGUMENTS
def function_with_arbitary_args(*args):
print("Third item is " + args["last_name"])
function_with_arbitary_args(first_name = "Rick", last_name = "Bowl")
-
Practice
#import Pynput library
import Pynput
#import mouse from pynput
from pynput import mouse
#import controller from mouse
from pynput.mouse import Controller
mouse = Controller()
print(mouse.position)
-
Sqlalchemy basic db migration commands Python
python manage.py db init #INITIALIZE NEW MIGRATION
python manage.py db migrate #TO CREATE MIGRATION
python manage.py db upgrade #APPLY MIGRATION CHANGES ON DB
-
murata (xlwing使用)空白があるテキストファイル(数行)を読み込んで、excelに貼り付ける
xlwing 使用
テキストファイルの中身
test.txt
テスト test t3 USD 44
te 55 99 gbe 99
pythonファイル
hello.py
def import_txt_split():
# ブックの読み込み
wb = xw.Book.caller()
# シートの読み込み
sht = wb.sheets['Sheet1']
# テキストファイルの読み込み
f = open('test.txt', 'r',encoding='UTF-8')
# 各行をリストで取得
datalist = f.readlines()
# 空白を取り除く
s = [i.split() for i in datalist]
# 1行ずつ、リストをペッと貼り付ける
for num,i in enumerate(s):
sht.cells(num+1,1).value = i
# テキストファイルを閉じる
f.close()
VBA側
Sub import_text()
RunPython ("import hello; hello.import_txt_split()")
End Sub
このVBAをボタンにマクロ登録。クリック
結果
qita1.png
上記の方法以外に、pandasのdataframeに貼り付けて、A1セルにぺっと貼り付ける方法でも出来た
-
murata xlwing python任意の平均値、標準偏差、行数、列数のテーブル作成関数
@xw.func
@xw.ret(expand = "table")
def randomstd(mean, std, rows, columns):
return np.random.normal(mean, std, (int(rows), int(columns)))
-
Check PyTorch GPU Availability
In [1]: import torch
In [2]: torch.cuda.current_device()
Out[2]: 0
In [3]: torch.cuda.device(0)
Out[3]: <torch.cuda.device at 0x7efce0b03be0>
In [4]: torch.cuda.device_count()
Out[4]: 1
In [5]: torch.cuda.get_device_name(0)
Out[5]: 'GeForce GTX 950M'
In [6]: torch.cuda.is_available()
Out[6]: True
-
Single line if else condition python
def hellPrint():
myvar = "1"
message = "Condition is true" if myvar == "1" else "Condition is false"
print(message)
hellPrint()
-
Using ternary operator in Python
condition_value = 1
val = 1 if condition_value == 1 else 2
print val