🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

"Split 2 columns (one fixed, one variable) into seperate datasets"

felix_wfelix_w Member Posts: 61 Contributor II
edited June 8 in Help

Dear Rapidminer Community, 

 

I have a problem I hope you can help me with. I am trying to split an excel file with ~500 columns into seperate data files. The design I need to have is that I would like to fix the first column (Column A is the date) and combine it with the next column (B) (attribute) and save this dataset. Then again (Column A + Column C), (Column A + Column D) etc. until the last column. All of these combinations should be saved seperately. 

 

I tried to handle this with "loop attribute" and "select attributes" in the loop but the problem is that my construct is stuck at the first attribute and doesnt go further then Column A + B. I wasn't able to make the loop work to go A + C, A + D, etc. The "select attribute" operator seems not to be dynamic, I cant use any macro with it? Is there another operator which I could use?

 

Does anybody know how I can solve this problem? Is there any other way than my loop?

 

This is what I have done so far:

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve EEX Data" width="90" x="112" y="34">
<parameter key="repository_entry" value="../Data/EEX Data"/>
</operator>
<operator activated="true" class="concurrency:loop_attributes" compatibility="7.6.001" expanded="true" height="82" name="Loop Attributes" width="90" x="246" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="F1BQ012018|F1BM052017"/>
<parameter key="regular_expression" value="[a-zA-Z0-9\s]"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="attribute_name_macro" value="loop_attribute"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="82" name="Multiply" width="90" x="45" y="34"/>
<operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="F1BQ012018"/>
<parameter key="attributes" value="Date|F1BQ012018"/>
<parameter key="regular_expression" value="F1BM"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="rename" compatibility="7.6.001" expanded="true" height="82" name="Rename" width="90" x="313" y="34">
<parameter key="old_name" value="F1BQ012018"/>
<parameter key="new_name" value="Preis"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
<list key="function_descriptions">
<parameter key="Produkt" value="%{loop_attribute}"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="store" compatibility="7.6.001" expanded="true" height="68" name="Store" width="90" x="581" y="34">
<parameter key="repository_entry" value="../Data/Files von Attribut Schleife/%{loop_attribute}"/>
</operator>
<connect from_port="input 1" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve EEX Data" from_port="output" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Loop Attributes" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:

Best Answer

  • mschmitzmschmitz Posts: 2,155  RM Data Scientist
    Solution Accepted

    Hi,

    the loop is the way to go. And I think you are nearly done. You just need to use the %{loop_attribute} in Select attributes as well. I think my attached process should do it.

     

    Cheers,

    Martin

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve EEX Data" width="90" x="112" y="34">
    <parameter key="repository_entry" value="../Data/EEX Data"/>
    </operator>
    <operator activated="true" class="concurrency:loop_attributes" compatibility="7.6.001" expanded="true" height="82" name="Loop Attributes" width="90" x="246" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="F1BQ012018|F1BM052017"/>
    <parameter key="regular_expression" value="[a-zA-Z0-9\s]"/>
    <process expanded="true">
    <operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attribute" value="F1BQ012018"/>
    <parameter key="attributes" value="Date|%{loop_attribute}"/>
    <parameter key="regular_expression" value="F1BM"/>
    </operator>
    <operator activated="true" class="rename" compatibility="7.6.001" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
    <parameter key="old_name" value="%{loop_attribute}"/>
    <parameter key="new_name" value="Preis"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="false" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="85">
    <list key="function_descriptions">
    <parameter key="Produkt" value="%{loop_attribute}"/>
    </list>
    <description align="center" color="transparent" colored="false" width="126">Not sure what this should do..</description>
    </operator>
    <operator activated="true" class="store" compatibility="7.6.001" expanded="true" height="68" name="Store" width="90" x="581" y="34">
    <parameter key="repository_entry" value="../Data/Files von Attribut Schleife/%{loop_attribute}"/>
    </operator>
    <connect from_port="input 1" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Store" to_port="input"/>
    <connect from_op="Store" from_port="through" to_port="output 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Retrieve EEX Data" from_port="output" to_op="Loop Attributes" to_port="input 1"/>
    <connect from_op="Loop Attributes" from_port="output 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>
    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany

Answers

  • felix_wfelix_w Member Posts: 61 Contributor II

    This is exactly what I needed! Thank you so much! :D 

    sgenzer
Sign In or Register to comment.