Extract and save the items of each cluster in an Excel file

ahootanha · March 2018

Hello
I kmeans clustered
I want to save the items of each cluster in an Excel file, but I do not know how to do it.
And
Is there a possibility of fuzzy clustering in the program?
Thanks

lionelderkrikor · March 2018

Hi @ahootanha,

To answer to your first question, one method is to use the Filter Examples and Write Excel operators.

Here an example of process with 3 clusters :

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="112" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="concurrency:k_means" compatibility="8.1.000" expanded="true" height="82" name="Clustering" width="90" x="313" y="34">
        <parameter key="k" value="3"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="8.1.000" expanded="true" height="103" name="Filter Examples" width="90" x="514" y="34">
        <list key="filters_list">
          <parameter key="filters_entry_key" value="cluster.equals.cluster_0"/>
        </list>
      </operator>
      <operator activated="true" class="write_excel" compatibility="8.1.000" expanded="true" height="82" name="Write Excel" width="90" x="715" y="34">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Extract_cluster\cluster_0.xlsx"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="8.1.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="514" y="187">
        <list key="filters_list">
          <parameter key="filters_entry_key" value="cluster.equals.cluster_1"/>
        </list>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="8.1.000" expanded="true" height="103" name="Filter Examples (3)" width="90" x="514" y="340">
        <list key="filters_list">
          <parameter key="filters_entry_key" value="cluster.equals.cluster_2"/>
        </list>
      </operator>
      <operator activated="true" class="write_excel" compatibility="8.1.000" expanded="true" height="82" name="Write Excel (2)" width="90" x="715" y="187">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Extract_cluster\cluster_1.xlsx"/>
      </operator>
      <operator activated="true" class="write_excel" compatibility="8.1.000" expanded="true" height="82" name="Write Excel (3)" width="90" x="715" y="340">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Extract_cluster\cluster_2.xlsx"/>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Clustering" to_port="example set"/>
      <connect from_op="Clustering" from_port="clustered set" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Write Excel" to_port="input"/>
      <connect from_op="Filter Examples" from_port="original" to_op="Filter Examples (2)" to_port="example set input"/>
      <connect from_op="Write Excel" from_port="through" to_port="result 1"/>
      <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Write Excel (2)" to_port="input"/>
      <connect from_op="Filter Examples (2)" from_port="original" to_op="Filter Examples (3)" to_port="example set input"/>
      <connect from_op="Filter Examples (3)" from_port="example set output" to_op="Write Excel (3)" to_port="input"/>
      <connect from_op="Write Excel (2)" from_port="through" to_port="result 2"/>
      <connect from_op="Write Excel (3)" from_port="through" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

To answer to your second question, I don't know what is "fuzzy clustering", but

if you don't know, a priori, the number of cluster(s), you can use the X-Means operator : This model determine

the correct number of cluster(s) based on the Bayesian Information Criteria (BIC).

I hope it helps,

Regards,

Lionel

lionelderkrikor · March 2018

Hi again @ahootanha,

To better answer to your first question, you can find here a more flexible process

using the Loop Values operator : it is independant of the number of cluster(s).

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="112" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="concurrency:k_means" compatibility="8.1.000" expanded="true" height="82" name="Clustering" width="90" x="246" y="34">
        <parameter key="k" value="3"/>
      </operator>
      <operator activated="true" class="concurrency:loop_values" compatibility="8.1.000" expanded="true" height="82" name="Loop Values" width="90" x="447" y="34">
        <parameter key="attribute" value="cluster"/>
        <process expanded="true">
          <operator activated="true" class="filter_examples" compatibility="8.1.000" expanded="true" height="103" name="Filter Examples" width="90" x="246" y="34">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="cluster.equals.%{loop_value}"/>
            </list>
          </operator>
          <operator activated="true" class="write_excel" compatibility="8.1.000" expanded="true" height="82" name="Write Excel" width="90" x="514" y="34">
            <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Extract_cluster\%{loop_value}.xlsx"/>
          </operator>
          <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Write Excel" to_port="input"/>
          <connect from_op="Write Excel" from_port="through" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Clustering" to_port="example set"/>
      <connect from_op="Clustering" from_port="clustered set" to_op="Loop Values" to_port="input 1"/>
      <connect from_op="Loop Values" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Regards,

Lionel

ahootanha · March 2018

Hello
thank you very much
But
I do not know where to use these code in my rapidminer program?
Please guide
Send me a screenshot of the implementation of operators
Thanks

ahootanha · March 2018

look
The field to choose the name of the clusters is empty
Help me please

Thomas_Ott · March 2018

@ahootanha did you search for this problem on the threads? It's probably due to the meta-data not propograting correctly. You can type in the attribute name (case sensitive) and it will work.

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

Extract and save the items of each cluster in an Excel file

Answers