python

First highest views based recommendation system in python part 1

df1 = pd.DataFrame(data1)
        df2 = pd.DataFrame(data2)
        view_data = df1.drop(["timestamp","user_email","user_name"],axis=1)
        movie_df = pd.merge(df2, view_data, on="movie_id")
        movie_grouped = movie_df.groupby(['movie_name','movie_id']).agg({'no_of_views': 'count'}).reset_index()
        grouped_sum = movie_grouped['no_of_views'].sum()
        movie_grouped['percentage']  = movie_grouped['no_of_views'].div(grouped_sum)*100
        movie_final = pd.DataFrame(movie_grouped)
        count = pd.DataFrame(movie_final)
        final_out = pd.merge(count, view_data, on="movie_id")
        final_up =final_out.drop_duplicates(subset='movie_id', keep='first', inplace=False)
        final = final_up.drop(["no_of_views_x","no_of_views_y","percentage"],axis=1)
        # converting it to csv with store id
        final_csv = final.to_csv('public/csv/'+str(studio_id)+'.csv')
        # opening the exported csv file to save to mongodb 
        f = open('public/csv/'+ str(studio_id) +'.csv','rU')
        #db URI
        col = connect_db(server).recomendations
        #reading csv
        reader = csv.DictReader(f)
        #askign desired header
        fieldnames=("movie_name","movie_id","user_id","customer_id","store_id")
        # loop to dismantle the dictionary as per the
        if(mode == 1):
            coly = connect_db(server).recomendations.delete_many({"customer_id":str(studio_id)})
        else:
            coly = connect_db(server).recomendations.delete_many({"store_id":str(studio_id)})
        # colx = connect_db(server).recomendations.delete_many({"store_id_x":"101"})
        for each in reader: 
            row={}
            for field in fieldnames:    
                row[field] = each[field]    
            col.insert(row)
        # cleaning the dataframes
        df1.iloc[0:0]
        df2.iloc[0:0]
        #deleting the csv after exporting to database
        # os.remove('public/csv/'+str(studio_id)+'.csv')
        print('Training Done !!')

This is the algo and train model part of the total operation and it just converts the query object to dataframes and then aggregate the view data with content views and saves them to mongo db 

Was this helpful?