|
4831 | 4831 | </span> |
4832 | 4832 | </a> |
4833 | 4833 |
|
| 4834 | +</li> |
| 4835 | + |
| 4836 | + <li class="md-nav__item"> |
| 4837 | + <a href="#spotpython.utils.aggregate.select_best_cluster" class="md-nav__link"> |
| 4838 | + <span class="md-ellipsis"> |
| 4839 | + select_best_cluster |
| 4840 | + </span> |
| 4841 | + </a> |
| 4842 | + |
4834 | 4843 | </li> |
4835 | 4844 |
|
4836 | 4845 | <li class="md-nav__item"> |
|
5641 | 5650 | </span> |
5642 | 5651 | </a> |
5643 | 5652 |
|
| 5653 | +</li> |
| 5654 | + |
| 5655 | + <li class="md-nav__item"> |
| 5656 | + <a href="#spotpython.utils.aggregate.select_best_cluster" class="md-nav__link"> |
| 5657 | + <span class="md-ellipsis"> |
| 5658 | + select_best_cluster |
| 5659 | + </span> |
| 5660 | + </a> |
| 5661 | + |
5644 | 5662 | </li> |
5645 | 5663 |
|
5646 | 5664 | <li class="md-nav__item"> |
@@ -6691,6 +6709,204 @@ <h2 id="spotpython.utils.aggregate.get_ranks" class="doc doc-heading"> |
6691 | 6709 | <div class="doc doc-object doc-function"> |
6692 | 6710 |
|
6693 | 6711 |
|
| 6712 | +<h2 id="spotpython.utils.aggregate.select_best_cluster" class="doc doc-heading"> |
| 6713 | + <code class="highlight language-python"><span class="n">select_best_cluster</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></code> |
| 6714 | + |
| 6715 | +<a href="#spotpython.utils.aggregate.select_best_cluster" class="headerlink" title="Permanent link">¶</a></h2> |
| 6716 | + |
| 6717 | + |
| 6718 | + <div class="doc doc-contents "> |
| 6719 | + |
| 6720 | + <p>Selects all points from the cluster whose center has the smallest mean y value.</p> |
| 6721 | + |
| 6722 | + |
| 6723 | +<p><span class="doc-section-title">Parameters:</span></p> |
| 6724 | + <table> |
| 6725 | + <thead> |
| 6726 | + <tr> |
| 6727 | + <th>Name</th> |
| 6728 | + <th>Type</th> |
| 6729 | + <th>Description</th> |
| 6730 | + <th>Default</th> |
| 6731 | + </tr> |
| 6732 | + </thead> |
| 6733 | + <tbody> |
| 6734 | + <tr class="doc-section-item"> |
| 6735 | + <td> |
| 6736 | + <code>X</code> |
| 6737 | + </td> |
| 6738 | + <td> |
| 6739 | + <code><span title="numpy.ndarray">ndarray</span></code> |
| 6740 | + </td> |
| 6741 | + <td> |
| 6742 | + <div class="doc-md-description"> |
| 6743 | + <p>X array, shape <code>(n, k)</code>.</p> |
| 6744 | + </div> |
| 6745 | + </td> |
| 6746 | + <td> |
| 6747 | + <em>required</em> |
| 6748 | + </td> |
| 6749 | + </tr> |
| 6750 | + <tr class="doc-section-item"> |
| 6751 | + <td> |
| 6752 | + <code>y</code> |
| 6753 | + </td> |
| 6754 | + <td> |
| 6755 | + <code><span title="numpy.ndarray">ndarray</span></code> |
| 6756 | + </td> |
| 6757 | + <td> |
| 6758 | + <div class="doc-md-description"> |
| 6759 | + <p>values, shape <code>(n,)</code>.</p> |
| 6760 | + </div> |
| 6761 | + </td> |
| 6762 | + <td> |
| 6763 | + <em>required</em> |
| 6764 | + </td> |
| 6765 | + </tr> |
| 6766 | + <tr class="doc-section-item"> |
| 6767 | + <td> |
| 6768 | + <code>k</code> |
| 6769 | + </td> |
| 6770 | + <td> |
| 6771 | + <code><span title="int">int</span></code> |
| 6772 | + </td> |
| 6773 | + <td> |
| 6774 | + <div class="doc-md-description"> |
| 6775 | + <p>number of clusters.</p> |
| 6776 | + </div> |
| 6777 | + </td> |
| 6778 | + <td> |
| 6779 | + <em>required</em> |
| 6780 | + </td> |
| 6781 | + </tr> |
| 6782 | + </tbody> |
| 6783 | + </table> |
| 6784 | + |
| 6785 | + |
| 6786 | + <p><span class="doc-section-title">Returns:</span></p> |
| 6787 | + <table> |
| 6788 | + <thead> |
| 6789 | + <tr> |
| 6790 | + <th>Type</th> |
| 6791 | + <th>Description</th> |
| 6792 | + </tr> |
| 6793 | + </thead> |
| 6794 | + <tbody> |
| 6795 | + <tr class="doc-section-item"> |
| 6796 | + <td> |
| 6797 | + <code><span title="numpy.ndarray">ndarray</span></code> |
| 6798 | + </td> |
| 6799 | + <td> |
| 6800 | + <div class="doc-md-description"> |
| 6801 | + <p>selected <code>X</code> values from the best cluster, shape <code>(m, k)</code>.</p> |
| 6802 | + </div> |
| 6803 | + </td> |
| 6804 | + </tr> |
| 6805 | + <tr class="doc-section-item"> |
| 6806 | + <td> |
| 6807 | + <code><span title="numpy.ndarray">ndarray</span></code> |
| 6808 | + </td> |
| 6809 | + <td> |
| 6810 | + <div class="doc-md-description"> |
| 6811 | + <p>selected <code>y</code> values from the best cluster, shape <code>(m,)</code>.</p> |
| 6812 | + </div> |
| 6813 | + </td> |
| 6814 | + </tr> |
| 6815 | + </tbody> |
| 6816 | + </table> |
| 6817 | + |
| 6818 | + |
| 6819 | +<p><span class="doc-section-title">Examples:</span></p> |
| 6820 | + <div class="highlight"><pre><span></span><code><span class="gp">>>> </span><span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="p">[</span><span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">]])</span> |
| 6821 | +<span class="gp">>>> </span><span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span> |
| 6822 | +<span class="gp">>>> </span><span class="n">X_best</span><span class="p">,</span> <span class="n">y_best</span> <span class="o">=</span> <span class="n">select_best_cluster</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> |
| 6823 | +<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">X_best</span><span class="p">)</span> |
| 6824 | +<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">y_best</span><span class="p">)</span> |
| 6825 | +</code></pre></div> |
| 6826 | + |
| 6827 | + |
| 6828 | + <details class="quote"> |
| 6829 | + <summary>Source code in <code>spotpython/utils/aggregate.py</code></summary> |
| 6830 | + <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">301</span> |
| 6831 | +<span class="normal">302</span> |
| 6832 | +<span class="normal">303</span> |
| 6833 | +<span class="normal">304</span> |
| 6834 | +<span class="normal">305</span> |
| 6835 | +<span class="normal">306</span> |
| 6836 | +<span class="normal">307</span> |
| 6837 | +<span class="normal">308</span> |
| 6838 | +<span class="normal">309</span> |
| 6839 | +<span class="normal">310</span> |
| 6840 | +<span class="normal">311</span> |
| 6841 | +<span class="normal">312</span> |
| 6842 | +<span class="normal">313</span> |
| 6843 | +<span class="normal">314</span> |
| 6844 | +<span class="normal">315</span> |
| 6845 | +<span class="normal">316</span> |
| 6846 | +<span class="normal">317</span> |
| 6847 | +<span class="normal">318</span> |
| 6848 | +<span class="normal">319</span> |
| 6849 | +<span class="normal">320</span> |
| 6850 | +<span class="normal">321</span> |
| 6851 | +<span class="normal">322</span> |
| 6852 | +<span class="normal">323</span> |
| 6853 | +<span class="normal">324</span> |
| 6854 | +<span class="normal">325</span> |
| 6855 | +<span class="normal">326</span> |
| 6856 | +<span class="normal">327</span> |
| 6857 | +<span class="normal">328</span> |
| 6858 | +<span class="normal">329</span> |
| 6859 | +<span class="normal">330</span> |
| 6860 | +<span class="normal">331</span> |
| 6861 | +<span class="normal">332</span> |
| 6862 | +<span class="normal">333</span> |
| 6863 | +<span class="normal">334</span> |
| 6864 | +<span class="normal">335</span> |
| 6865 | +<span class="normal">336</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">select_best_cluster</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">k</span><span class="p">):</span> |
| 6866 | +<span class="w"> </span><span class="sd">"""</span> |
| 6867 | +<span class="sd"> Selects all points from the cluster whose center has the smallest mean y value.</span> |
| 6868 | + |
| 6869 | +<span class="sd"> Args:</span> |
| 6870 | +<span class="sd"> X (numpy.ndarray): X array, shape `(n, k)`.</span> |
| 6871 | +<span class="sd"> y (numpy.ndarray): values, shape `(n,)`.</span> |
| 6872 | +<span class="sd"> k (int): number of clusters.</span> |
| 6873 | + |
| 6874 | +<span class="sd"> Returns:</span> |
| 6875 | +<span class="sd"> (numpy.ndarray): selected `X` values from the best cluster, shape `(m, k)`.</span> |
| 6876 | +<span class="sd"> (numpy.ndarray): selected `y` values from the best cluster, shape `(m,)`.</span> |
| 6877 | + |
| 6878 | +<span class="sd"> Examples:</span> |
| 6879 | +<span class="sd"> >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])</span> |
| 6880 | +<span class="sd"> >>> y = np.array([5, 4, 3, 2, 1])</span> |
| 6881 | +<span class="sd"> >>> X_best, y_best = select_best_cluster(X, y, 2)</span> |
| 6882 | +<span class="sd"> >>> print(X_best)</span> |
| 6883 | +<span class="sd"> >>> print(y_best)</span> |
| 6884 | +<span class="sd"> """</span> |
| 6885 | + <span class="c1"># Perform k-means clustering</span> |
| 6886 | + <span class="n">kmeans</span> <span class="o">=</span> <span class="n">KMeans</span><span class="p">(</span><span class="n">n_clusters</span><span class="o">=</span><span class="n">k</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_init</span><span class="o">=</span><span class="s2">"auto"</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span> |
| 6887 | + <span class="n">labels</span> <span class="o">=</span> <span class="n">kmeans</span><span class="o">.</span><span class="n">labels_</span> |
| 6888 | + <span class="c1"># Compute mean y for each cluster</span> |
| 6889 | + <span class="n">cluster_means</span> <span class="o">=</span> <span class="p">[]</span> |
| 6890 | + <span class="k">for</span> <span class="n">cluster_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">k</span><span class="p">):</span> |
| 6891 | + <span class="n">cluster_y</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">labels</span> <span class="o">==</span> <span class="n">cluster_idx</span><span class="p">]</span> |
| 6892 | + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cluster_y</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> |
| 6893 | + <span class="n">cluster_means</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">inf</span><span class="p">)</span> |
| 6894 | + <span class="k">else</span><span class="p">:</span> |
| 6895 | + <span class="n">cluster_means</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">cluster_y</span><span class="p">))</span> |
| 6896 | + <span class="c1"># Find cluster with smallest mean y</span> |
| 6897 | + <span class="n">best_cluster</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmin</span><span class="p">(</span><span class="n">cluster_means</span><span class="p">)</span> |
| 6898 | + <span class="c1"># Select all points from the best cluster</span> |
| 6899 | + <span class="n">mask</span> <span class="o">=</span> <span class="n">labels</span> <span class="o">==</span> <span class="n">best_cluster</span> |
| 6900 | + <span class="k">return</span> <span class="n">X</span><span class="p">[</span><span class="n">mask</span><span class="p">],</span> <span class="n">y</span><span class="p">[</span><span class="n">mask</span><span class="p">]</span> |
| 6901 | +</code></pre></div></td></tr></table></div> |
| 6902 | + </details> |
| 6903 | + </div> |
| 6904 | + |
| 6905 | +</div> |
| 6906 | + |
| 6907 | +<div class="doc doc-object doc-function"> |
| 6908 | + |
| 6909 | + |
6694 | 6910 | <h2 id="spotpython.utils.aggregate.select_distant_points" class="doc doc-heading"> |
6695 | 6911 | <code class="highlight language-python"><span class="n">select_distant_points</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></code> |
6696 | 6912 |
|
|
0 commit comments