Split attributes from one to many repositories

Marco_PMarco_P RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2 Contributor I
edited July 2020 in Help



i have a repository with over 20.000 attributes (different types etc.). A like to split the repository into many, whereby each new repository should contain 600 attributes out of the 20.000. If I would join them a get the orginal data set back.










Best Answer

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,517 RM Data Scientist
    Solution Accepted

    Hi Marco,


    that turned out to be trickier than i expected it to be. Have a look on the attached process, i think this solved it.




    <?xml version="1.0" encoding="UTF-8"?><process version="7.2.001">
    <operator activated="true" class="process" compatibility="7.2.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="85">
    <parameter key="repository_entry" value="//Samples/data/Sonar"/>
    <operator activated="true" class="loop" compatibility="7.2.001" expanded="true" height="82" name="Loop" width="90" x="179" y="85">
    <parameter key="set_iteration_macro" value="true"/>
    <parameter key="iterations" value="5"/>
    <process expanded="true">
    <operator activated="true" class="generate_macro" compatibility="7.2.001" expanded="true" height="82" name="Generate Macro" width="90" x="45" y="136">
    <list key="function_descriptions">
    <parameter key="max" value="(eval(%{iteration}))*5"/>
    <parameter key="min" value="(eval(%{iteration})-1)*5+1"/>
    <description align="center" color="transparent" colored="false" width="126">Change the #attributes here</description>
    <operator activated="true" class="multiply" compatibility="7.2.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="136"/>
    <operator activated="true" class="subprocess" compatibility="7.2.001" expanded="true" height="103" name="Subprocess" width="90" x="313" y="187">
    <process expanded="true">
    <operator activated="true" class="transpose" compatibility="7.2.001" expanded="true" height="82" name="Transpose" width="90" x="45" y="34"/>
    <operator activated="true" class="select_attributes" compatibility="7.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <operator activated="true" class="filter_example_range" compatibility="7.2.001" expanded="true" height="82" name="Filter Example Range" width="90" x="313" y="34">
    <parameter key="first_example" value="%{min}"/>
    <parameter key="last_example" value="%{max}"/>
    <operator activated="true" class="transpose" compatibility="7.2.001" expanded="true" height="82" name="Transpose (2)" width="90" x="447" y="34"/>
    <operator activated="true" class="data_to_weights" compatibility="7.2.001" expanded="true" height="82" name="Data to Weights" width="90" x="581" y="34"/>
    <connect from_port="in 1" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Transpose (2)" to_port="example set input"/>
    <connect from_op="Transpose (2)" from_port="example set output" to_op="Data to Weights" to_port="example set"/>
    <connect from_op="Data to Weights" from_port="weights" to_port="out 1"/>
    <connect from_op="Data to Weights" from_port="example set" to_port="out 2"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    <portSpacing port="sink_out 3" spacing="0"/>
    <description align="center" color="transparent" colored="false" width="126">Get a good weight vector</description>
    <operator activated="true" class="select_by_weights" compatibility="7.2.001" expanded="true" height="103" name="Select by Weights" width="90" x="447" y="136"/>
    <operator activated="false" class="store" compatibility="7.2.001" expanded="true" height="68" name="Store" width="90" x="581" y="187"/>
    <connect from_port="input 1" to_op="Generate Macro" to_port="through 1"/>
    <connect from_op="Generate Macro" from_port="through 1" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Select by Weights" to_port="example set input"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Subprocess" to_port="in 1"/>
    <connect from_op="Subprocess" from_port="out 1" to_op="Select by Weights" to_port="weights"/>
    <connect from_op="Select by Weights" from_port="example set output" to_port="output 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    <connect from_op="Retrieve Sonar" from_port="output" to_op="Loop" to_port="input 1"/>
    <connect from_op="Loop" from_port="output 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany


  • Options
    Marco_PMarco_P RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2 Contributor I

    Hey Martin,


    thx a lot. Very nice solution and works well.


    Best Regards





Sign In or Register to comment.