o
    qhd                     @   s   d dl Zd dlZd dlmZ ddlmZmZ G dd dZG dd deZ	G d	d
 d
eZ
G dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    N)defaultdict   )RecommenderLoggerperformance_trackerc                   @   sF   e Zd ZdZdddZdd Zdd Zdd
dZdddZdd Z	d	S )MABAlgorithmzBase class for MAB algorithms.r   c                 C   sV   || _ || _t| jj d| dd| _|   | jjd| jj d||dd d S )N_arms)namez
Algorithm z initialized)n_armsn_suggestions)extra)r
   r   r   	__class____name__loggerresetinfo)selfr
   r    r   ]/Users/divyeshpatel/Desktop/sahana/Recommender/recommender_rl/rl_recommender/mabalgorithms.py__init__	   s   zMABAlgorithm.__init__c                 C   s,   t | j| _t | j| _d| _d| _dS )zReset the algorithm's state.r   N)npzerosr
   
arm_countsarm_rewardstotal_stepstotal_pullsr   r   r   r   r      s   
zMABAlgorithm.resetc                 C      t d)zSelect an arm to pull.(Subclasses should implement this method.NotImplementedErrorr   r   r   r   
select_arm      zMABAlgorithm.select_armNc                 C   r   )z,Select multiple arms to pull simultaneously.r   r   )r   input_arms_idxr   r   r   select_multiple_arms    r"   z!MABAlgorithm.select_multiple_armsc                 C   s4   | j j| jjt|dr| n||| j| jd dS )zLog arm selection for tracking.tolist)algorithm_namearms_selectedcontextr
   r   N)r   log_algorithm_selectionr   r   hasattrr%   r
   r   )r   r'   r(   r   r   r   _log_arm_selection$   s   
zMABAlgorithm._log_arm_selectionc              	      sF  t |ttjfrH j|  d7  <  j|  |7  <   jd7  _  jd7  _t jd _ j	j
 jj|g|g j|  j j| d dS t||D ]\}}t|} j|  d7  <  j|  |7  < qM  jd7  _  jt|7  _t jd _ j	j
 jjt|dr| n||t j j fdd|D d dS )z9Update the algorithm's state after pulling multiple arms.r   r   )r&   r   rewardscumulative_rewardstepr   r%   c                    s   g | ]} j | qS r   )r   ).0armr   r   r   
<listcomp>U   s    z'MABAlgorithm.update.<locals>.<listcomp>N)
isinstanceintr   integerr   r   r   r   maxr   
log_rewardr   r   ziplenr*   r%   sumr   r   r,   r0   rewardr   r   r   update.   s:   


zMABAlgorithm.update)r   N)
r   
__module____qualname____doc__r   r   r!   r$   r+   r<   r   r   r   r   r      s    



r   c                       s6   e Zd ZdZd
 fdd	ZdddZddd	Z  ZS )	RandomMABz
    Random MAB algorithm.
    r   Nc                       t  || || _d S r=   )superr   seed)r   r
   r   rD   r   r   r   r   \      
zRandomMAB.__init__c                 C   s>   |d u r| j d u rtj|S tj|d ur|n| j |S r=   )rD   r   randomchoiceRandomState)r   r#   rD   r   r   r   r!   `   s   >zRandomMAB.select_armc                 C   s   |d u r$| j d u r$|d u rtjj| j| jdd}n9tjj|| jdd}n.|d u r>tj|d ur1|n| j j| j| jdd}ntj|d urG|n| j j|| jdd}|tj}t	d| d|  | j
|d|id |S )NFsizereplacezRandomMAB: , rD   r(   )rD   r   rG   rH   r
   r   rI   astypeint64printr+   )r   r#   rD   r   r   r   r   r$   d   s   ,(zRandomMAB.select_multiple_armsr   Nr=   NNr   r>   r?   r@   r   r!   r$   __classcell__r   r   rE   r   rA   X   s
    
rA   c                       6   e Zd ZdZd fdd	ZdddZdd	d
Z  ZS )EpsilonGreedyMABz!Epsilon-Greedy algorithm for MAB.r   皙?c                    rB   r=   )rC   r   epsilonr   r
   r   rY   rE   r   r   r      rF   zEpsilonGreedyMAB.__init__Nc                 C   sz   t j | jk r|d u rt j| jS t j|S t j| j| jt 	| j| jdkd}|d u r4t 
|S |t 
||  S )Nr   outwhere)r   rG   randrY   rH   r
   divider   r   
zeros_likeargmax)r   r#   avg_rewardsr   r   r   r!      s   $
zEpsilonGreedyMAB.select_armc                 C   s  t j | jk r%|d u rt jj| j| jdd}n
t jj|| jdd}d}n?t j| j| j	t 
| j| j	dkd}|d u rNt || j d  }|d d d }nt || | j d  }|d d d }d}|t j}| j||| jdt v ry| nd d	d
 td| d|  |S )NFrJ   Zexplorer   r[   Zexploitrb   )strategyrY   rb   rN   zEpsilonGreedyMAB: rM   )r   rG   r^   rY   rH   r
   r   r_   r   r   r`   argsortrO   rP   r+   localsr%   rQ   )r   r#   r   rd   rb   top_armsr   r   r   r$      s(   $
z%EpsilonGreedyMAB.select_multiple_armsr   rX   r=   rT   r   r   rE   r   rW   ~   s
    
rW   c                       rV   )UpperConfidenceBoundMABz)Upper Confidence Bound algorithm for MAB.r          @c                    rB   r=   )rC   r   c)r   r
   r   rk   rE   r   r   r      rF   z UpperConfidenceBoundMAB.__init__Nc              	   C   s   |d u rt | jD ]}| j| dkr|  S q	n|D ]}| j| dkr'|  S qtj| j| jt| j| jdkd}t| jd}| j	t
tjt|| jt| j| jdkd }|| }|d u rgt|S |t||  S )Nr   r[   r   )ranger
   r   r   r_   r   r`   r5   r   rk   sqrtlogra   )r   r#   r0   rb   safe_total_pulls
confidence
ucb_valuesr   r   r   r!      s"   $4
z"UpperConfidenceBoundMAB.select_armc              	   C   s  g }|d u r<t | jD ]0}| j| dkr;|| t|| jkr1tj|d | j tjd}|  S tj|tjd  S q| jt| }|dkrtj	| j
| jt| j
| jdkd}t| jd}| jttj	t|| jt| j| jdkd }|d u r|| }	n|| ||  }	|d u rt|	| d  }
t||
d d d gtj}|S t|	| | d  }
t||
d d d gtj}|| S tj|tjd}|S )Nr   )dtyper[   r   rc   )rl   r
   r   appendr8   r   r   arrayrP   r_   r   r`   r5   r   rk   rm   rn   re   concatenaterO   )r   r#   Zunexplored_armsr0   r   Zremaining_slotsrb   ro   rp   rq   rg   Zall_armsr   r   r   r$      s6   
	$4
  z,UpperConfidenceBoundMAB.select_multiple_arms)r   rj   r=   rT   r   r   rE   r   ri      s
    
ri   c                       sB   e Zd ZdZd fdd	ZdddZdd	d
Z fddZ  ZS )ThompsonSamplingMABz$Thompson Sampling algorithm for MAB.r         ?c                    s>   t  || || _|| _t|| | _t|| | _d S r=   )rC   r   alphabetar   ones	successesfailures)r   r
   r   rx   ry   rE   r   r   r     s
   zThompsonSamplingMAB.__init__Nc                 C   s6   t j| j| j}|d u rt |S |t ||  S r=   )r   rG   ry   r{   r|   ra   )r   r#   sampled_thetar   r   r   r!     s   
zThompsonSamplingMAB.select_armc                 C   sx   t j| j| j}|d u r t || j d  }|d d d }nt || | j d  }|d d d }|t j}|S )Nrc   )	r   rG   ry   r{   r|   re   r   rO   rP   )r   r#   r}   rg   r   r   r   r   r$     s   z(ThompsonSamplingMAB.select_multiple_armsc                    s   t  || t|ttjfr)|dkr| j|  d7  < d S | j|  d7  < d S t||D ]\}}|dkr@| j|  d7  < q.| j|  d7  < q.d S )Nr   r   )	rC   r<   r2   r3   r   r4   r{   r|   r7   r:   rE   r   r   r<   !  s   zThompsonSamplingMAB.update)r   rw   rw   r=   )	r   r>   r?   r@   r   r!   r$   r<   rU   r   r   rE   r   rv     s    

rv   c                       sL   e Zd ZdZd fdd	ZdddZdd	d
Zd fdd	Zdd Z  Z	S )ContextualMABz$Contextual Bandit algorithm for MAB.r   rX   c                    s<   t   | || _t fdd| _t fdd| _d S )Nc                      
   t  S r=   r   r   r   r
   r   r   <lambda>8     
 z(ContextualMAB.__init__.<locals>.<lambda>c                      r   r=   r   r   r   r   r   r   9  r   )rC   r   rY   r   context_rewardscontext_countsrZ   rE   r   r   r   5  s   zContextualMAB.__init__Nc                 C   s   |d u rd}|  |}t | jk r%|d u rt| jS |tt| S tj| j| | j	| t
| j| j	| dkd}t|dkrhtj| j| jt| j| jdkd}|d u r_t|S |t||  S |d u rqt|S |t||  S )Ndefaultr   r[   )_create_context_keyrG   rY   	randranger
   r8   r   r_   r   r   r   r9   r   r   r`   ra   )r   r(   r#   context_keycontext_avg
global_avgr   r   r   r!   ;  s*   

$

zContextualMAB.select_armc                 C   s  |d u rd}|  |}t | jk r4|d u r"tjj| j| jdd}n
tjj|| jdd}|tj}|S tj	| j
| | j| t| j| j| dkd}t|dkrtj	| j| jt| j| jdkd}|d u rt|| j d  }|d d d }|tj}|S t|| | j d  }|d d d }|tj}|| S |d u rt|| j d  }|d d d }|tj}|S t|| | j d  }|d d d }|tj}|| S )Nr   FrJ   r   r[   rc   )r   rG   rY   r   rH   r
   r   rO   rP   r_   r   r   r   r9   r   r   r`   re   )r   r(   r#   r   r   r   r   rg   r   r   r   r$   Y  sF   

$z"ContextualMAB.select_multiple_armsc                    s   t  || |d u rd}| |}t|ttjfr2| j| |  |7  < | j| |  d7  < d S t	||D ]\}}| j| |  |7  < | j| |  d7  < q7d S )Nr   r   )
rC   r<   r   r2   r3   r   r4   r   r   r7   )r   r   r,   r(   r   r0   r;   rE   r   r   r<     s   
zContextualMAB.updatec                 C   s*   t |trdt|ddd S t|S )z-Create a simple context key from the context.Zstep_Zsession_stepr      )r2   dictmingetstr)r   r(   r   r   r   r     s   
z!ContextualMAB._create_context_keyrh   rS   r=   )
r   r>   r?   r@   r   r!   r$   r<   r   rU   r   r   rE   r   r~   2  s    

4r~   c                       s6   e Zd ZdZd
 fdd	Zdd Z fdd	Z  ZS )EnsembleMABz'Base class for ensemble MAB algorithms.r   Nc                    s^   |pg | _ | j rtt| j t| j  ng | _| j r#tt| j ng | _t || d S r=   )	
algorithmsr   rz   r8   algorithm_weightsr   algorithm_performancesrC   r   )r   r
   r   r   rE   r   r   r     s   
&zEnsembleMAB.__init__c                 C   sH   | j | | j r"tt| j t| j  | _tt| j | _dS dS )z!Add an algorithm to the ensemble.N)r   rs   r   rz   r8   r   r   r   r   	algorithmr   r   r   add_algorithm  s
   zEnsembleMAB.add_algorithmc                    sZ   t    | jr+| jD ]}|  qtt| jt| j | _tt| j| _dS dS )z&Reset the ensemble and all algorithms.N)	rC   r   r   r   rz   r8   r   r   r   r   rE   r   r   r     s   


zEnsembleMAB.resetrR   )r   r>   r?   r@   r   r   r   rU   r   r   rE   r   r     s
    	r   c                       s4   e Zd ZdZd fdd	ZdddZd	d
 Z  ZS )VotingEnsembleMABzw
    Voting-based ensemble MAB algorithm.
    Each algorithm votes for arms, and the most voted arms are selected.
    r   Nmajorityc                       t  ||| || _d S r=   )rC   r   voting_method)r   r
   r   r   r   rE   r   r   r        
zVotingEnsembleMAB.__init__c              	   C   s  | j s.|d u rtjj| j| jdd}|tj}|S tjj|| jdd}|tj}|| S g }| j D ]<}t|dre|d urZt|drZz|	||}W n t
yY   |	|}Y nw |	|}|| q3| g}|| q3t| j}|D ]}|D ]}|t|  d7  < q|qx| jdkrt|| j d  d d d }	n>| jdkrt| j}
t|D ]\}}| j| }|D ]}|
t|  |7  < qqt|
| j d  d d d }	n| |}	|	tj}	|	S )NFrJ   r$   r   r   rc   weighted)r   r   rG   rH   r
   r   rO   rP   r*   r$   	TypeErrorrs   r!   r   r3   r   re   	enumerater   _ranked_voting)r   r(   r#   r   	all_votesr   votesZ	arm_votesvoteselected_armsZweighted_votesiweightr   r   r   r$     sN   




"

"
z&VotingEnsembleMAB.select_multiple_armsc                 C   sd   t | j}|D ]}t|D ]\}}|t|  d|d  7  < qqt || j d ddd S )zImplement ranked voting system.rw   r   Nrc   )r   r   r
   r   r3   re   r   )r   r   
arm_scoresr   rankr   r   r   r   r     s    z VotingEnsembleMAB._ranked_voting)r   Nr   rS   )r   r>   r?   r@   r   r$   r   rU   r   r   rE   r   r     s
    
7r   c                       B   e Zd ZdZd fdd	ZdddZd fd	d
	Zdd Z  ZS )WeightedEnsembleMABz
    Weighted ensemble MAB algorithm.
    Combines arm selection probabilities from all algorithms using weighted averaging.
    r   N{Gz?c                    r   r=   )rC   r   weight_update_rate)r   r
   r   r   r   rE   r   r   r     r   zWeightedEnsembleMAB.__init__c           
   	   C   s4  | j s%|d u rtjj| j| jdd}n
tjj|| jdd}|tj}|S t| j}t	| j D ]I\}}| j
| }t|drd|d ur^t|dr^z|||}W n ty]   ||}Y nw ||}n||g}|D ]}	|t|	  |7  < qlq0t|| j d  d d d }|tj}|d u r|S || S NFrJ   r$   rc   )r   r   rG   rH   r
   r   rO   rP   r   r   r   r*   r$   r   r!   r3   re   )
r   r(   r#   r   r   r   r   r   r   r0   r   r   r   r$     s4   

 z(WeightedEnsembleMAB.select_multiple_armsc              	      s   t  || | jD ]/}t|dr9|dur3t|dr3z	|||| W q
 ty2   ||| Y q
w ||| q
| | dS )zBUpdate ensemble and adjust algorithm weights based on performance.r<   N)rC   r<   r   r*   r   _update_weightsr   r   r,   r(   r   rE   r   r   r<   C  s   

zWeightedEnsembleMAB.updatec                 C   s   | j sdS t| j D ] \}}t|dr*t|dr*t|jt|jd }|| j|< q
t	| jdkrPt
| j}|t	| }d| j | j | j|  | _dS dS )z5Update algorithm weights based on recent performance.Nr   r   r   r   )r   r   r*   r   meanr   maximumr   r   r9   expr   r   )r   r,   r   r   
avg_rewardZexp_performancesZnew_weightsr   r   r   r   U  s   

z#WeightedEnsembleMAB._update_weights)r   Nr   rS   r=   )	r   r>   r?   r@   r   r$   r<   r   rU   r   r   rE   r   r     s    
(r   c                       sB   e Zd ZdZd fdd	Zddd	Zd fd
d	Zdd Z  ZS )DynamicEnsembleMABz|
    Dynamic ensemble MAB algorithm.
    Dynamically selects the best performing algorithm based on recent performance.
    r   N2   rX   c                    sZ   t  ||| || _|| _| jrdd tt| jD ng | _| jr(d| _d S d | _d S )Nc                 S   s   g | ]}g qS r   r   )r/   r   r   r   r   r1   t  s    z/DynamicEnsembleMAB.__init__.<locals>.<listcomp>r   )	rC   r   window_sizeswitch_thresholdr   rl   r8   algorithm_rewardscurrent_best_algorithm)r   r
   r   r   r   r   rE   r   r   r   p  s
   $zDynamicEnsembleMAB.__init__c                 C   s   | j s,|d u rtjj| j| jdd}|tj}|S tjj|| jdd}|tj}|S | j | j }t	|dr[|d urVt	|drVz|
||W S  tyU   |
| Y S w |
|S ||gS )NFrJ   r$   )r   r   rG   rH   r
   r   rO   rP   r   r*   r$   r   r!   )r   r(   r#   r   Zbest_algorithmr   r   r   r$   w  s$   

z'DynamicEnsembleMAB.select_multiple_armsc              	      s   t  || t| jD ]K\}}t|dr=|dur7t|dr7z	|||| W n ty6   ||| Y nw ||| | j| | t| j| | j	krW| j| 
d q|   dS )z;Update ensemble and check if algorithm switching is needed.r<   Nr   )rC   r<   r   r   r*   r   r   rs   r8   r   pop_check_algorithm_switch)r   r   r,   r(   r   r   rE   r   r   r<     s   
zDynamicEnsembleMAB.updatec                 C   s~   t | jdk r	dS g }| jD ]}|rt|}|| q|d qt|}|| }|| j }||| j kr=|| _dS dS )z;Check if we should switch to a better performing algorithm.   Ng        )	r8   r   r   r   r   rs   ra   r   r   )r   Zperformancesr,   Zavg_performanceZbest_idxZbest_performanceZcurrent_performancer   r   r   r     s   




z*DynamicEnsembleMAB._check_algorithm_switch)r   Nr   rX   rS   r=   )	r   r>   r?   r@   r   r$   r<   r   rU   r   r   rE   r   r   j  s    
r   c                       r   )ExpertEnsembleMABze
    Expert ensemble MAB algorithm.
    Uses exponential weighting to combine expert algorithms.
    r   NrX   c                    s<   t  ||| || _| jrtt| j| _d S g | _d S r=   )rC   r   learning_rater   r   r   r8   algorithm_losses)r   r
   r   r   r   rE   r   r   r     s   &zExpertEnsembleMAB.__init__c              	   C   sb  | j s,|d u rtjj| j| jdd}|tj}|S tjj|| jdd}|tj}|S t| j	 | j
 }|t| }t| j}t| j D ]H\}}|| }t|dr{|d urut|druz|||}	W n tyt   ||}	Y nw ||}	n||g}	|	D ]}
|t|
  |7  < qqHt|| j d  d d d }	|	tj}	|d u r|	S ||	 S r   )r   r   rG   rH   r
   r   rO   rP   r   r   r   r9   r   r   r*   r$   r   r!   r3   re   )r   r(   r#   r   weightsr   r   r   r   r   r0   r   r   r   r$     s<   
 z&ExpertEnsembleMAB.select_multiple_armsc              	      s   t  || | jD ]/}t|dr9|dur3t|dr3z	|||| W q
 ty2   ||| Y q
w ||| q
| || dS )z"Update ensemble and expert losses.r<   N)rC   r<   r   r*   r   _update_expert_lossesr   rE   r   r   r<     s   

zExpertEnsembleMAB.updatec                 C   s   | j sdS t| j D ]A\}}t|dr.t|dr)z| }W n   | g}Y n
| }n| g}d}|D ]
}||vrA|d7 }q7| j|  |7  < q
dS )z0Update expert losses based on their predictions.Nr$   r   r   )r   r   r*   r$   r!   r   )r   r   r,   r   r   Zpredicted_armsZlossr0   r   r   r   r     s$   



z'ExpertEnsembleMAB._update_expert_losses)r   NrX   rS   r=   )	r   r>   r?   r@   r   r$   r<   r   rU   r   r   rE   r   r     s    
-r   )numpyr   rG   collectionsr   r   r   r   r   rA   rW   ri   rv   r~   r   r   r   r   r   r   r   r   r   <module>   s    R&;I0wLYT