o
    Ti                     @   sP  d Z ddlmZ ddlZddlZddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ dd Zdd	 Zg Zg Zd
D ]LZeddD ]D\ZZeeeeZdededed  d dfZddddgfZ ee eD ]\Z!Z"e#ee!e"eef e#e! de de  qoqGq@ej$j%ej$j&deedej$&dddgej$&dddgej$&dddgej$&dddgej$&dddgdd  Z'ej$&d!ddgd"d# Z(ej$&d$d%d&gg d'gd(d) Z)ej$&d$d%d&gg d'gd*d+ Z*d,d- Z+d.d/ Z,ej-d0d1 Z.d2d3 Z/d4d5 Z0d6d7 Z1d8d9 Z2ej$&d:g d;ej$&dddgej$&d<g d=ej$&d>ddgej$&d?ddgd@dA Z3ej$&dBddgej$&dCddg dDg dEg dFfddg dGg dHg dIfddg dJg dKg dIfgdLdM Z4ej-dNdO Z5ej$&dPddddQg dRg dSfddddQg dTg dUfddddQg dVg dWfddddXg dYg dWfgdZd[ Z6ej-d\d] Z7ej$&d^ddg d_g d`fddg dag dbfddg dcg ddfddg deg dffgdgdh Z8ej-didj Z9ej$&dkdddgej:g dlg dmdnfdg doeedgeg dpedqdrej;ggg dsg dtg dugg dmdvfgej$&dddgdwdx Z<ej$&d>ddgej$&dyddgej$&dzddQej=g d{ej>d|fddXe=g d}fgd~d Z?dd Z@ej$&d>ddgej$&dzddQej=g d{ej>d|fddXe=g d}fgdd ZAej$&d>ddgej$&dzddQej=g dej>d|fddXe=g dfgdd ZBej$&d>ddgej$&ddg dfdg dfgej$&dzddQej=g dej>d|fddXe=g dfgdd ZCej$&d>ddgej$&dyddgej$&dzddQej=g d{ej>d|fddXe=g d}fgdd ZDej$&dddQg dfddXg dffgdd ZEej$&ddeFdg dfdeFddg g dfgej$&d>ddgdd ZGej$&dddgdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMej$&d!ddgdd ZNdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    )productN)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetime)Versionc                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer	   tmassert_series_equal)dfresultZdf_mi_expectedZmi_expectedexpected r"   f/var/www/html/evchargy.com/venv/lib/python3.10/site-packages/pandas/tests/groupby/test_value_counts.py.tests_value_counts_index_names_category_column   s   
r$   c                 C   s   t ddd}ttjdtd|tjd||tjdd|d |d}| rm|d d	|d< tj	|j
dd d
df< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< |S )Nz
2015-08-24
   )Zperiods   abcdr   )1st2nd3rdr*   float   r(         r)            	   )r
   r   nprandomZdefault_rngchoicelistZintegersr   nanloc)	seed_nansnmdaysframer"   r"   r#   seed_df0   s   r>   TF)d   i  )      rA   r*   r   r&   r(   r)   -zdf, keys, bins, n, m)idsisortTFznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   dd }|||	|
|d}| j ||d}|d jdi |}| j ||d}|d jtjfi |}|jjd d dg |j_||}t|||f\}}t	|
 |
  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r6   mapr   Zget_level_valuesrangeZnlevelsr   from_arraysrK   )r   Zarrr"   r"   r#   rebuild_index^   s   z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerG   rH   rI   binsrG   r*   r"   )r   r   applyr	   r   rK   renamerL   r   r   
sort_index)r   keysrQ   r:   r;   rE   rP   r   rG   rH   rI   rO   kwargsgrleftrightr"   r"   r#    test_series_groupby_value_countsT   s   

r\   utcc                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NiGI]i)J]iJ]iK]i)<M]iU=M]iN]appler`   bananara   orangerb   pear	TimestampFoodr-   re   sr]   unitDatetime1Dfreqkeyrf   r   )r   dropr   r   r   r   rV   rT   r	   r   rK   rU   r   r   )r]   r   dfgr    r!   r"   r"   r#   -test_series_groupby_value_counts_with_groupery   s   	
rq   r   AB)rr   rs   Cc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   rS   r   )dtyper   rJ   )r   r   r   r	   ru   r   rN   lenr   r   r   r   r   rp   r    r!   r"   r"   r#   &test_series_groupby_value_counts_empty   s   
rx   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   rS   )r   rM   rv   r   r   r   r   rw   r"   r"   r#   (test_series_groupby_value_counts_one_row   s
   rz   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r}   orderedru   r   ry   r   r   )r	   r   r   r   r   rN   r3   arrayr   r   r   )rg   r    r!   r"   r"   r#   /test_series_groupby_value_counts_on_categorical   s   r   c                  C   s   t g dg dg dd} | jddgddd	 }|jdd}td
dgddgg dgg dg dg dgg dd}tg d|dd}t|| d S )Nmaler   r   r   r   r   lowmediumhighr   r   r   r   FRr   r   r   r   r   	educationr   r   r   FrR   r   r   r   r   r   )r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r&   r   r&   r   r   r   levelscodesrK   r   r   r   r&   r   r   r   )r   r   r   r   r	   r   r   )r   gbr    r   r!   r"   r"   r#   (test_series_groupby_value_counts_no_sort   s   r   c                   C   s   t g dg dg ddS )Nr   r   r   r   r   r"   r"   r"   r#   education_df   s   r   c                 C   s|   d}t jt|d | jddd}W d    n1 sw   Y  tjtdd |  W d    d S 1 s7w   Y  d S )Nz+DataFrame.groupby with axis=1 is deprecatedmatchr   r   axisr   )r   Zassert_produces_warningFutureWarningr   pytestraisesNotImplementedErrorr   )r   msggpr"   r"   r#   	test_axis   s   
"r   c                 C   sL   |  d}tjtdd |jdgd W d    d S 1 sw   Y  d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   )r   r   r"   r"   r#   test_bad_subset   s   
"r   c                 C   sx   t tjt dkr|jtjjddd | dddg j	dd	}t
g d
tjg dg dddd}t|| d S )N1.25Ypandas default unstable sorting of duplicatesissue with numpy>=1.25 with AVX instructionsFreasonstrictr   r   r   TrP   )      ?      ?r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   rF   r   )r   r3   __version__node
add_markerr   markxfailr   r   r	   r   from_tuplesr   r   )r   requestr    r!   r"   r"   r#   
test_basic   s&   	
r   c                 C   s   | | j |||dS )NrP   rG   rH   )r   )r   rW   rP   rG   rH   r"   r"   r#   _frame_value_counts  s   r   r   columnr   functionzsort, ascending))FN)TTr?   as_indexr=   c	                    sF  t tjt dkr|r|r|r|jtjjddd d d j fddd| }	 j	|	|d	}
|
d
dg j
|||d}|r|
td
dg|||}|rTt|| d S |rXdnd}| jd|idd}|dkr~|jddidd}t|d dd|d< n|dkr|d dk|d< nt|d dd|d< t|| d S  d
 d  d   d< |
d j
|||d}||_|r|jjdd}|d jdjd|d
< |d jdjd|d< |d= |jdd idd}t||_t|| d S |dd
|d jdjd |dd|d jdjd |d= t|| d S )Nr   r   Fr   r   c                    s    d |  dkS )Nr   r   r"   )xr   r"   r#   <lambda>A      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>r   )byr   r   r   r   rF   r   r   r   r   r   level_0r   r   r   rC   Zbothr   r&   )r   r3   r   r   r   r   r   r   valuesr   r   rT   r   r   r   reset_indexrU   whereassert_frame_equalr   r   Zto_framestrsplitgetr   r   insert)r   r   rP   r   rG   rH   r   r=   r   r   r   r    r!   Zindex_framer"   r   r#   $test_against_frame_and_seriesgroupby  sb   

""r   rP   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   r&   r-      r   )r   r-   r   r-   r   )r   r-   r   r&   r   )r   r&   r   r   r   )r   r-   r-   r   r   )r   r   r-   r&   r   )r   r   r&   r   r   c           
         s   j ddgddd}|d j|||d}t }	dD ]  fdd	|D |	 < q|r7||	d
< |	d
  |  < n||	d< t||	 d S )Nr   r   F)r   rG   r   r   r   c                       g | ]}  | qS r"   r"   .0rowr   r   r"   r#   
<listcomp>      z!test_compound.<locals>.<listcomp>rF   r   )r   r   r   r   r   )
r   rP   rG   rH   expected_rowsZexpected_countZexpected_group_sizer   r    r!   r"   r   r#   test_compoundq  s   r   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )r&   r   r      )r&   r   r   r   rn   Znum_legsZ	num_wings)Zfalcondogcatantr   r   r"   r"   r"   r#   
animals_df  s   r   z?sort, ascending, normalize, name, expected_data, expected_indexr   r   r&   r   )r   r   r   )r&   r   r   r&   r   r   r   r   r&   )r   )r&   r   r   r   )r&   r   r   )r   )r   r&   r   )r   r&   r   rF   )r   r   r   c           
      C   s^   | j |||d}t|tj|g dd|d}t|| | dj |||d}	t|	| d S )N)rG   rH   rP   r   rJ   r   rn   )r   r	   r   rN   r   r   r   )
r   rG   rH   rP   r   expected_dataexpected_indexresult_framer!   result_frame_groupbyr"   r"   r#   test_data_frame_value_counts  s   
r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r-   r&   rA   r1   r/   )rr   rs   rt   D)r3   r7   r   )r:   r"   r"   r#   nulls_df  s   r   z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   r-   rA   r/   r   r1   r&   r   )	r   r         ?r   r   r   r   r   r   )r   r   r-   rA   r&   r   )r   r   r   r   r   r   )r   r   rA   r/   r   r1   )r   r   r   r   r   r   )r   r   rA   )r   r   r   c                    s   t tjt dkr|s|jtjjddd jddg|d}|j	dd|d	}t
 }jD ]  fd
d|D | < q.t|}	t||	dd}
t||
 d S )Nr   r   Fr   rr   rs   )rI   T)rP   rG   rI   c                    r   r"   r"   r   r   r   r"   r#   r     r   z,test_dropna_combinations.<locals>.<listcomp>rF   r   )r   r3   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   )r   Zgroup_dropnaZcount_dropnar   expected_valuesr   r   r    r   r   r!   r"   r   r#   test_dropna_combinations  s   	

r   c                 C   s    t g dg dd| | dgdS )Nr   )JohnAnner   BethSmithLouisern   Z
first_nameZmiddle_namer   )Znulls_fixturer"   r"   r#   names_with_nulls_df  s   
r   z%dropna, expected_data, expected_index)r   r   )r   r   )r   r   r   rJ   r   )r   r   r   r   r   )r   r   r   r   )r   r   r&   r&   )r&   r   r   r&   r   c           	      C   s`   | j ||d}t|||d}|r|tt| }t|| | dj ||d}t|| d S )N)rI   rP   r   rn   )r   r	   r+   rv   r   r   r   )	r   rI   rP   r   r   r   r   r!   r   r"   r"   r#   #test_data_frame_value_counts_dropna   s   !
r   observedznormalize, name, expected_data)r&   r   r   r   r   r   r   r   r   r   r   r   ru   )r   r   r           r   r   r   r   r   r   r   r   c                 C   s   t tjt dkr|jtjjddd | dj	d||d}|j
|d}tjg d	g d
d}	t||	|d}
tdD ]}|
jjt|
jj| |d|
_q<|rXt||
 d S |
j|r^dndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   r   r-   levelrF   r   r   )r   r3   r   r   r   r   r   r   r   r   r   r   r   r	   rM   r   
set_levelsr   r   r   r   r   r   r   r   r   rP   r   r   r   r   r    r   expected_seriesir!   r"   r"   r#   =test_categorical_single_grouper_with_only_observed_categories3  s<   



r  c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g dd|d}	t	d	D ] }
t
|	jj|
 }|
d
krI|| d jj}|	jj||
d|	_q2|r]t||	 d S |	j|d}t|| d S )Nr   r   ASIAr   r   r   rJ   r   r-   r   r  r  )copyr   r   Zadd_categoriesr   r   r	   r   r   rM   r   r   r   Zset_categoriesr}   r  r   r   r   r   )r   r   r   r   rP   r   r   r   r    r	  r
  Zindex_levelr!   r"   r"   r#   !assert_categorical_single_grouper~  s.   
r  c              	   C   sL   t tjt dkr|jtjjddd g d}t| |d||||d d S )Nr   r   Fr   r   Tr   r   r   r   rP   r   r   	r   r3   r   r   r   r   r   r   r  r   r   rP   r   r   r   r   r"   r"   r#   -test_categorical_single_grouper_observed_true  s"   

r  )r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   c              	   C   sL   t tjt dkr|jtjjddd g d}t| |d||||d d S )Nr   r   Fr   )r   r   r   r   r   r   r   r   r  r  r  r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   r  r  r  r"   r"   r#   .test_categorical_single_grouper_observed_false  s"   ,

r  zobserved, expected_index)r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r  r  r  r  r  )r   r   r&   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r/||dk n|tj|g dd|d	}	td
D ]}
|	jj	t
|	jj|
 |
d|	_q@|r\t||	 d S |	j|rbdndd}t|| d S )Nr   r   r   r   r   r   )r   r   r   rJ   r   r&   r  rF   r   r  )r  r   r   r   r	   r   r   rM   r   r  r   r   r   r   r   r   )r   r   r   r   rP   r   r   r   r    r	  r
  r!   r"   r"   r#   "test_categorical_multiple_groupers2  s2   7


r  c                 C   s   t tjt dkr|jtjjddd |  } | d 	d| d< | d 	d| d< | j
d||d	}|j|d
}g d}	t|tj|	g dd|d}
tddD ]}|
jjt|
jj| |d|
_qP|rlt||
 d S |
j|rrdndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   rJ   r   r   r-   r  rF   r   r  )r   r3   r   r   r   r   r   r   r  r   r   r   r	   r   r   rM   r   r  r   r   r   r   r   r   r  r"   r"   r#   test_categorical_non_groupers  s>   


r  z*normalize, expected_label, expected_valuesr   c                 C   s   t g dg dd}|jg dddd gdd	}|jd
| d}t dtjg dtjddg ddg ddg d||i}t|| d S )Nr   r   r&   r-   )rr   rs   )r   rA   r   rr   c                 S   s   | dkrdS dS )Nr   r/   r1   r"   )r
  r"   r"   r#   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   T)rG   rP   r   )r   r   rA   r   r   Zlevel_2)r1   r1   r/   rs   )r   r-   r&   )r   r   r   r3   r   int_r   r   )rP   expected_labelr   r   r   r    r!   r"   r"   r#   test_mixed_groupings  s   		r  ztest, columns, expected_namesrepeatZabbde)r{   Ndr|   r|   er  r'   level_1)r{   Nr!  r|   cr#  c           
      C   s   t g dg dg|d}ddg}dtjddgtjd	d
g}|j||d }|r<tdtj||ddd}t	
|| d S dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )N)r   r-   rA   r/   r2   )r&   r   r   r1   r%   r   )r   r   r/   r-   rA   r2   )r&   r   r1   r   r   r%   r{   r   r   r   r!  r  r   rJ   r   r   c                 S   s   g | ]	}t |d g qS )r   )r6   r   r"   r"   r#   r     s    z0test_column_label_duplicates.<locals>.<listcomp>r#  )r   r3   r   int64r   r   r	   r   r   r   r   r6   appendr   )
testr   Zexpected_namesr   r   r   rW   r    r!   Zexpected_columnsr"   r"   r#   test_column_label_duplicates  s(   
r(  znormalize, expected_labelc                 C   sn   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    d S 1 s0w   Y  d S )Nr  r{   r|   r   Fr  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rP   r  r   r   r"   r"   r#   test_result_label_duplicates  s   	"r)  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nr{   r   r   r&   rJ   r   r   )r   r   r3   r   r%  r   r	   r   r   r   r   )r   r   r    r!   r"   r"   r#   test_ambiguous_grouping!  s   r*  c                  C   sj   t g dg ddg dd} d}tjt|d | djdgd	 W d    d S 1 s.w   Y  d S )
Nr{   r|   r$  r   yr-  c1c2r   r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r/  r   r   r   r   r   r   r   r   r   r"   r"   r#   "test_subset_overlaps_gb_key_raises,  
   "r4  c                  C   sj   t g dg ddg dd} d}tjt|d | djd	gd
 W d    d S 1 s.w   Y  d S )Nr+  r,  r.  r1  r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r/  c3r   r2  r3  r"   r"   r#   !test_subset_doesnt_exist_in_frame4  r5  r7  c                  C   sp   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgddd}t|| d S )Nr+  r,  r.  r1  r   r   r  r0  r   r   r&   r   r-  rJ   r   r   r   r   r   r	   r   rN   r   r   r   r    r!   r"   r"   r#   test_subset<  s   r:  c                  C   s   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg dddd}t|| d S )N)r{   r   r   )r|   r-  r-  r1  )r/  r0  r0  )r   r   r   r  r0  r   r   r&   r   r-  )Nr0  r0  rJ   r   r   r8  r9  r"   r"   r#   test_subset_duplicate_columnsH  s   r;  c                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}| }tg d| d}|d  }t||g dgg dtdg dgg dd}t	d|dd}t
|| d S )Nr^   r_   rd   r-   re   rg   rh   rj   rk   rl   )z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)r]   )r`   ra   rb   rc   )r   r   r   r&   r&   r-   r   )r   r   r   r&   r&   r-   )rj   re   rf   r   r   r   r   )r   ro   r   r   r   r   uniquer   rM   r	   r   r   )r]   r   r   r    datesZ
timestampsr   r!   r"   r"   r#   test_value_counts_time_grouperZ  s*   	r>  )O__doc__	itertoolsr   numpyr3   r   Zpandasr   r   r   r   r   r   r	   r
   r   Zpandas._testingZ_testingr   Zpandas.util.versionr   r$   r>   ZbinnedrD   r9   r:   r;   r   ZarangemaxrQ   rW   kr|   r&  r   ZslowZparametrizer\   rq   rx   rz   r   r   Zfixturer   r   r   r   r   r   r   r   r   r   r   r   rN   r7   r   r   r%  r  r  r  r  r  r  r  r6   r(  r)  r*  r4  r7  r:  r;  r>  r"   r"   r"   r#   <module>   s   ,$	




 H





	
:#*
&0 %<


