df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
view_data = df1.drop(["timestamp","user_email","user_name"],axis=1)
movie_df = pd.merge(df2, view_data, on="movie_id")
movie_grouped = movie_df.groupby(['movie_name','movie_id']).agg({'no_of_views': 'count'}).reset_index()
grouped_sum = movie_grouped['no_of_views'].sum()
movie_grouped['percentage'] = movie_grouped['no_of_views'].div(grouped_sum)*100
movie_final = pd.DataFrame(movie_grouped)
count = pd.DataFrame(movie_final)
final_out = pd.merge(count, view_data, on="movie_id")
final_up =final_out.drop_duplicates(subset='movie_id', keep='first', inplace=False)
final = final_up.drop(["no_of_views_x","no_of_views_y","percentage"],axis=1)
# converting it to csv with store id
final_csv = final.to_csv('public/csv/'+str(studio_id)+'.csv')
# opening the exported csv file to save to mongodb
f = open('public/csv/'+ str(studio_id) +'.csv','rU')
#db URI
col = connect_db(server).recomendations
#reading csv
reader = csv.DictReader(f)
#askign desired header
fieldnames=("movie_name","movie_id","user_id","customer_id","store_id")
# loop to dismantle the dictionary as per the
if(mode == 1):
coly = connect_db(server).recomendations.delete_many({"customer_id":str(studio_id)})
else:
coly = connect_db(server).recomendations.delete_many({"store_id":str(studio_id)})
# colx = connect_db(server).recomendations.delete_many({"store_id_x":"101"})
for each in reader:
row={}
for field in fieldnames:
row[field] = each[field]
col.insert(row)
# cleaning the dataframes
df1.iloc[0:0]
df2.iloc[0:0]
#deleting the csv after exporting to database
# os.remove('public/csv/'+str(studio_id)+'.csv')
print('Training Done !!')
This is the algo and train model part of the total operation and it just converts the query object to dataframes and then aggregate the view data with content views and saves them to mongo dbÂ
0 Comments