o
    h`                  
   @   s*  d dl Zd dlZd dlZz
d dlmZ dZ	W n e
y#   dZdZ	Y nw d dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZmZ ddlmZmZ ejded	 z	ejd
dd W n e
yr Z zW Y dZ[ndZ[ww G dd dZ dd Z!dd Z"dd Z#dd Z$e%dkre"  dS dS )    NTF)Counter)		RandomMABEpsilonGreedyMABUpperConfidenceBoundMABThompsonSamplingMABContextualMABVotingEnsembleMABWeightedEnsembleMABDynamicEnsembleMABExpertEnsembleMAB   )RecommenderLoggerperformance_trackerignore)categorygymnasium_env/RecommendationMABz@rl_recommender.UnifiedRecommendationEnv:UnifiedRecommendationEnv)identry_pointc                   @   sh   e Zd ZdZdddZdd Zddd	ZdddZe	 ddddfddZ
dd ZdddZdd ZdS )MABSimulationz&Framework for running MAB experiments.epsilon_greedyNc              	   C   sz   || _ i | _tdd| _d | _|| _|pi | _|   | jjdt	|j
t|ddt|ddt|ddd||d	d
 d S )NZmab_simulation)namez$MAB Simulation framework initializedmax_armszN/An_usersn_suggestions)r   r   r   )Zenvironment_typeZenvironment_configalgorithm_typealgorithm_paramsextra)environmentresultsr   loggercurrent_algorithmr   r   _initialize_algorithminfotype__name__getattr)selfr   r   r    r(   f/Users/divyeshpatel/Desktop/sahana/Recommender/recommender_rl/recommender/rl_recommender/simulation.py__init__>   s    



zMABSimulation.__init__c                 C   s
  zt | jdd}t | jdd}t|tr|dkrd}t|tr#|dkr%d}| jd| d|  | jdkr=|||| _nS| jd	krR| j	d
d}t
|||d| _n>| jdkrg| j	dd}t|||d| _n)| jdkrst||| _n| jdkr| j	d
d}t|||d| _nt
||dd| _| jjd| j d| jjj||dd W dS  ty } zQ| jdt|  z|dd| _| jd W n) ty } z| jdt|  ddlm} |dd| _W Y d}~nd}~ww W Y d}~dS W Y d}~dS d}~ww )z/Initialize the default algorithm based on type.r   2   r      r   z#Initializing algorithm with n_arms=z, n_suggestions=randomr   epsilon皙?r.   Zucbc       @r1   ZthompsonZ
contextual皙?zInitialized z
 algorithm)Zalgorithm_classn_armsr   r   zError initializing algorithm: z*Fallback to RandomMAB algorithm successfulz Fallback algorithm also failed: )r   
   r   N)r&   r   
isinstanceintr    r#   r   r!   r   getr   r   r   r   	__class__r%   	Exceptionerrorstrrl_recommender.mabalgorithmsr   )r'   r5   r   r   r.   r1   eZfallback_errorr(   r(   r)   r"   U   sT   




z#MABSimulation._initialize_algorithmc                 C   s   || _ |pi | _|   | jS )z'Set a new algorithm for the simulation.)r   r   r"   r!   )r'   r   r   r(   r(   r)   set_algorithm   s   
zMABSimulation.set_algorithmr6      r   Tc           +         sP  t   }| jjt| ||||d i }| D ]u\}}	|r*| jd| d g }
g }g }g }g }d}t|	trog }g  g }g }g }t	|D ]}|	
  g }g }g }g }g }d}t	|D ]q}| j
 \}}|d}d}t	|D ]Wtjjd| jjdd} |	| }!t|!dr|! nt|!}!| j|!\}}"}}}#|#d	g }$|	|!|"gt|!  ||" ||"7 }|| ||! ||$ ||"7 }qt|| q`||  | || || || qJtj|dd
 }
tj dd
 }tj|dd
 }g }t	t|d D ]#fdd|D }%dd |%D }&t|&dd d }'||' qg }g }(|D ]})|(|) q;t	t|D ]fdd|(D }$|tj|$ qJt fddt	|D }n|	
  t	|D ]}| j
 \}}|d}d}t	|D ]dkr| j|!\}}"}}}#|#d	g }$t|	tr|	j|!|"gt|! |d n|	|!|"gt|!  tjj| jj|	jdd} t|	tr|	|| }!n|	| }!t|!dr|! nt|!}!| j|!\}}"}}}#|#d	g }$t|	tr|	j|!|"gt|! |d n|	|!|"gt|!  |
|" ||"7 }|| ||! ||$ ||"7 }q|| qw|ri|d td|d  dkri| jd|d  d| dt|d |	|
||||||||  |	j ! tj"|	j#|	j t$|	j#|	j dkdd
||< q|| _%t   | }*| jj&||*t|||d |S )aN  
		Run MAB experiment comparing different algorithms.

		Args:
			algorithms (dict): Dictionary of algorithm_name -> algorithm_instance
			n_users (int): Number of users to simulate
			n_steps (int): Number of recommendations per user
			random_runs (int): Number of random runs for each algorithm
			verbose (bool): Print progress
		)
algorithmsr   n_stepsrandom_runsverbosezRunning z algorithm...r   user_idr6   )sizetolistevent_types)axisc                       g | ]}|  qS r(   r(   ).0Zrun_selectionsstepr(   r)   
<listcomp>       z0MABSimulation.run_experiment.<locals>.<listcomp>c                 S   s   g | ]	}|D ]}|qqS r(   r(   )rL   Zsublistitemr(   r(   r)   rO          r   c                    rK   r(   r(   )rL   eventsrM   r(   r)   rO      rP   c                    s    g | ]} | r | d  qS )r(   )rL   run)cumulative_rewards_all_runsr(   r)   rO     s     )contextFrG   replace   z
Completed /z users, avg reward: .4f)outwhere)
	algorithmtotal_rewardscumulative_rewardsarm_selectionsuser_rewardsrI   final_total_rewardaverage_reward
arm_countsZarm_avg_rewards)r   durationZn_algorithmsr   rC   )'timer    log_simulation_startlistkeysitemsr#   r7   r   rangeresetr   r9   npr-   randint
n_productsselect_multiple_armshasattrrH   rN   updatelenappendextendmeanr   most_commonchoicer   r5   maxrf   copydividearm_rewards
zeros_liker   log_simulation_complete)+r'   rB   r   rC   rD   rE   
start_timer   alg_namer_   r`   ra   rb   rc   Zevent_types_allcumulative_rewardZtotal_rewards_all_runsZarm_selections_all_runsZuser_rewards_all_runsZevent_types_all_runsrU   Ztotal_rewards_runZcumulative_rewards_runZarm_selections_runZuser_rewards_runZevent_types_runZcumulative_reward_runZuser_idxobs_rF   Zuser_total_rewardinput_arms_idxarmsrewardr#   rI   Zstep_selectionsZflat_selectionsry   Zall_event_typesZrun_event_typesrg   r(   )rV   rN   r)   run_experiment   s   


















"*
"zMABSimulation.run_experimentc              
      s  z|| _ |js| | |du r|j}|du rt|j}t|t|}t	|t
r0||| n|| t |krB d|  n%t |k rg fdd|D }|rgtjj||t  dd}t |g tt d|  | jjd| d  | |jjt t|dd	  W S  ty }	 zE| jjd
t|	 ||r|jjndt|	dd	 |durt|dkrtjj|t|pdt|dd}
|
 W  Y d}	~	S g W  Y d}	~	S d}	~	ww )a  
		Invoke the algorithm to get recommendations for a user.
		
		Args:
			algorithm: MAB algorithm instance
			event_df (pd.DataFrame): DataFrame containing recent user feedback events
			input_arms_idx: Available arms to choose from (if None, uses all arms)
			obs: Contextual information for contextual algorithms
			user_id: User identifier for logging
			n_recommendations: Number of recommendations to return (defaults to algorithm.n_suggestions)
		
		Returns:
			list: List of recommended item indices
		Nc                    s   g | ]}| vr|qS r(   r(   )rL   armrecommendationsr(   r)   rO   }  s    z.MABSimulation.invoke_agent.<locals>.<listcomp>FrX   z#Generated recommendations for user : )rF   r   r_   n_recommendationsZinput_arms_countr   zError in invoke_agent: None)rF   r_   r<   r   r,   )r!   emptyupdate_algorithmr   ro   aranger5   minru   r7   r   rr   r-   rz   concatenaterj   setr    r#   r:   r%   r;   r<   r=   rH   )r'   r_   event_dfr   r   rF   r   Zremaining_arms
additionalr?   fallbackr(   r   r)   invoke_agentO  sP   




 zMABSimulation.invoke_agentc                 C   s  |j r| jd dS | D ]\}}z|d}|d}|d}|dtj }ddd	d
d}||d}	t| dr| j	durt
|ttjfrOt|ntt|}
d|
  kra| j	jk rn n0| j	|
g|	g | jjd| d| d| d|	 ||||	|
t|dr| nt|dd n| jd|
 d| j	j d n| jd W q ty } z&| jjd| dt| t|dr| nt|t|dd W Y d}~qd}~ww dS )z
		Update the algorithm based on event data.
		
		Args:
			event_df (pd.DataFrame): DataFrame containing user feedback events
				Expected columns: user_id, item_id, event_type, timestamp
		z)No events to process for algorithm updateNrF   item_id
event_type	timestampr/   gQ?g      ?g      )viewcartpurchaseremove_from_cartg        r!   r   z#Updated algorithm with event: user=z, item=z, event=z	, reward=	isoformat)rF   r   r   r   	arm_indexr   r   z
Arm index z" out of bounds for algorithm with z armsz$No current algorithm set for updatesz$Error updating algorithm with event r   to_dict)eventr<   )r   r    r#   iterrowsr9   pd	Timestampnowrs   r!   r7   r8   ro   integerr=   r5   rt   r   warningr;   r<   r   )r'   r   r   r   rF   r   r   r   Zreward_mappingr   r   r?   r(   r(   r)   r     sL   


$  	zMABSimulation.update_algorithm   r6   Fmab_experiment_results.pngc                    s  t s
 jd dS  js jd dS tjdd|d\}}|d } j D ]\}}|j|d |dd	 q(|d
 |	d |
d |  |jddd |d }t j }	 fdd|	D }
|j|	|
dd}|d |	d |jddd |
d t||
D ]\}}|j| | d  | d |dddd q|d }t j|	d   d! }t|}dt|	 }t j D ]\}\}}|j|||  |d! |d|d" q|d# |	d$ |
d% |||t|	d&  d   |d'd t|D  |  |d( }d)d  j D }|j||	d* |
d+ |	d, |jddd |jddd t  |rFt | dS t!  dS )-z(
		Plot comparison of MAB algorithms.
		z/Matplotlib not available; skipping plot_resultsNz,No results to plot. Run an experiment first.   )figsize)r   r   ra   g?)labelalphaz
Time StepszCumulative RewardzCumulative Rewards Over TimeT333333?)r   )r   r   c                    s   g | ]	} j | d  qS )re   )r   )rL   r   r'   r(   r)   rO     rR   z.MABSimulation.plot_results.<locals>.<listcomp>ffffff?Z
AlgorithmszAverage Rewardx-   )rJ   rotationzAverage Reward per AlgorithmgMbP?r\   centerbottom)hava)r   r   r   rf   )widthr   r   z	Arm IndexzSelection CountzArm Selection Frequencyr   c                 S   s   g | ]}| qS r(   r(   )rL   ir(   r(   r)   rO   	  s    )r   r   c                 S   s   g | ]}|d  qS )rc   r(   )rL   resultr(   r(   r)   rO     rP   )tick_labelszReward Distribution per UserzTotal Reward per User)"MATPLOTLIB_AVAILABLEr    r   r   pltsubplotsrl   plot
set_xlabel
set_ylabel	set_titlelegendgridrj   rk   bartick_paramsziptextget_x	get_width
get_heightru   ro   r   	enumerate
set_xticksset_xticklabelsrm   valuesboxplottight_layoutsavefigshow)r'   r   savefilenamefigaxesaxr   r   Z	alg_namesavg_rewardsbarsr   valuer5   r   r   r   Zuser_rewards_datar(   r   r)   plot_results  s`   





2
"




zMABSimulation.plot_resultsc                 C   s"  | j s| jd dS | jd | jd | jd t| j  dd dd	}t|d
D ]Z\}\}}| jd| d|  | jd|d d | jd|d d | jdt|d d t	|d dd ddd }| jd| d|d |  d q.| jd dS )z
		Print summary statistics.
		z1No results to summarize. Run an experiment first.Nz=
============================================================zMAB SIMULATION RESULTS SUMMARYz<============================================================c                 S   s   | d d S )Nr   re   r(   )r   r(   r(   r)   <lambda>*  s    z-MABSimulation.print_summary.<locals>.<lambda>T)keyreverser   
z. z   Average Reward: re   r\   z   Total Reward: rd   z.2fz   Std Dev (per user): rc   rf   rT   z   Top 3 Arms: z
 (counts: ))
r   r    r   r#   sortedrl   r   ro   stdargsort)r'   Zsorted_resultsrankr   r   top_armsr(   r(   r)   print_summary  s    $zMABSimulation.print_summary)r   N)N)r6   rA   r   T)r   Fr   )r%   
__module____qualname____doc__r*   r"   r@   r   r   	DataFramer   r   r   r   r(   r(   r(   r)   r   ;   s    

2
 AM
:Fr   c                 C   s  t | |t| |ddt| |ddt| |ddt| |g}t | |t| |ddt| |t| |ddt| |ddt| |ddt| |ddt| |ddt| |d	dt| || d
dt| || ddt| || ddt| || ddt| || dddt	| || ddd}|S )z,Create all available algorithms for testing.r/   r0   r   r2   r3   r4   g      ?r   g?majorityrB   voting_methodweightedrankedg{Gz?)rB   weight_update_raterA   g?)rB   window_sizeswitch_threshold)rB   learning_rate)RandomzUCB (c=2.0)Thompson SamplingzContextual MABu   Epsilon-Greedy (ε=0.1)u   Epsilon-Greedy (ε=0.3)u   Epsilon-Greedy (ε=0.5)u   Epsilon-Greedy (ε=0.7)u   Epsilon-Greedy (ε=0.9)zVoting Ensemble (Majority)zVoting Ensemble (Weighted)zVoting Ensemble (Ranked)zWeighted EnsemblezDynamic EnsemblezExpert Ensemble)
r   r   r   r   r   r   r|   r	   r
   r   )r5   r   Zbase_algorithmsrB   r(   r(   r)   create_all_algorithms9  s.   
r   c                  C   sd   d} d}d}d}t jdd| |dd}t| |}t|}|j|||d	d
}|  |jdd	dd dS )z)Run comprehensive test of all algorithms.rA   r,   rZ   d   r   dataset/filtered_data.csv*   datar   r   seedTr   rC   rE         zcomprehensive_mab_results.pngr   r   r   Ngymmaker   r   r   r   r   r5   r   r   rC   Zmab_envrB   
simulationr   r(   r(   r)   run_comprehensive_test[     
r  c                  C   s   d} d}d}d}t jdd| |dd}|jd	 t| |t| |d
dt| |ddt| |t| |t| |t| |ddt| |ddgddd}t	|}|j
|||dd}|  dS )z$Run test with visualization enabled.r      r   rA   r   r   r   r   humanr4   r0   r2   r3   r/   r   r   )r   u   Epsilon-Greedy (ε=0.2)ZUCBr   zVoting EnsembleTr  N)r	  r
  	unwrappedZset_render_moder   r   r   r   r   r   r   r   r  r(   r(   r)   run_visualization_testx  s4   r  c                  C   sd   d} d}d}d}t jdd| |dd}t| |}t|}|j|||d	d
}|  |jdddd dS )z!Run quick test of all algorithms.r6   r   r,   r+   r   r   r   r   Fr  r  Tzquick_mab_results.pngr  Nr  r  r(   r(   r)   run_quick_test  r  r  __main__)&	gymnasiumr	  numpyro   pandasr   matplotlib.pyplotpyplotr   r   r;   warningsrh   collectionsr   r>   r   r   r   r   r   r   r	   r
   r   r    r   r   filterwarningsUserWarningregisterr?   r   r   r  r  r  r%   r(   r(   r(   r)   <module>   sJ    ,
    "'
