🎉 🎉   RAPIDMINER 9.5 BETA IS OUT!!!   🎉 🎉

GRAB THE HOTTEST NEW BETA OF RAPIDMINER STUDIO, SERVER, AND RADOOP. LET US KNOW WHAT YOU THINK!

CLICK HERE TO DOWNLOAD

🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

Stacking IOObject output for different models in a simple csv table

sebastian_gonzasebastian_gonza RapidMiner Certified Analyst, Member Posts: 52  Guru
edited December 2018 in Help

Hello

 

Can someone tell me if it is possible to stack the output of differetn individual models from a collection of IOObjects, into one simple csv table? I tried writing a csv, excel after the collection operator but it did not work

 

Thanks

Tagged:

Answers

  • rfuentealbarfuentealba Moderator, RapidMiner Certified Analyst, Member, University Professor Posts: 417   Unicorn

    Hi @sebastian_gonza,

     

    Can you post your XML process? Yes, it should be doable. Let's see what you've got.

     

    All the best,

     

    Rodrigo.

  • sebastian_gonzasebastian_gonza RapidMiner Certified Analyst, Member Posts: 52  Guru
    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="9.0.002" expanded="true" height="82" name="Modelos" width="90" x="179" y="136">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="9.0.002" expanded="true" height="68" name="Read CSV" width="90" x="246" y="34">
    <parameter key="csv_file" value="C:\Users\ANGLOBAL\Desktop\sample.csv"/>
    <list key="annotations"/>
    <list key="data_set_meta_data_information"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.0.002" expanded="true" height="103" name="Multiply (2)" width="90" x="447" y="34"/>
    <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes (5)" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Cantidad|IdCliente|IdPedido|Importe|PrecioFinal"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="9.0.002" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="715" y="34">
    <list key="function_descriptions">
    <parameter key="Logimporte" value="log(Importe+1)"/>
    <parameter key="Logprecio" value="log([PrecioFinal]+1)"/>
    <parameter key="Logcantidades" value="log(Cantidad+1)"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="9.0.002" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
    <parameter key="attribute_name" value="Logimporte"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="remove_duplicates" compatibility="9.0.002" expanded="true" height="103" name="Remove Duplicates" width="90" x="983" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="IdCliente|IdPedido|Importe|Cantidad"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.0.002" expanded="true" height="82" name="Multiply" width="90" x="1117" y="34"/>
    <operator activated="true" class="operator_toolbox:group_into_collection" compatibility="1.5.000" expanded="true" height="82" name="Group Into Collection" width="90" x="45" y="238">
    <parameter key="group_by_attribute" value="IdCliente"/>
    </operator>
    <operator activated="true" class="annotate" compatibility="9.0.002" expanded="true" height="68" name="Annotate" width="90" x="179" y="238">
    <list key="annotations">
    <parameter key="Comment" value="IdCliente"/>
    </list>
    </operator>
    <operator activated="true" class="loop_collection" compatibility="9.0.002" expanded="true" height="145" name="Loop Collection" width="90" x="313" y="238">
    <process expanded="true">
    <operator activated="true" class="extract_macro" compatibility="9.0.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="45" y="34">
    <parameter key="macro" value="IdCliente"/>
    <parameter key="macro_type" value="data_value"/>
    <parameter key="attribute_name" value="IdCliente"/>
    <parameter key="example_index" value="1"/>
    <list key="additional_macros"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Logcantidades|Logimporte|Logprecio"/>
    </operator>
    <operator activated="true" class="split_data" compatibility="9.0.002" expanded="true" height="103" name="Split Data" width="90" x="313" y="34">
    <enumeration key="partitions">
    <parameter key="ratio" value="0.7"/>
    <parameter key="ratio" value="0.3"/>
    </enumeration>
    </operator>
    <operator activated="true" class="linear_regression" compatibility="9.0.002" expanded="true" height="103" name="Linear Regression (2)" width="90" x="447" y="34">
    <parameter key="eliminate_colinear_features" value="false"/>
    <parameter key="use_bias" value="false"/>
    </operator>
    <operator activated="true" class="annotate" compatibility="9.0.002" expanded="true" height="68" name="Annotate (2)" width="90" x="581" y="34">
    <list key="annotations">
    <parameter key="Comment" value="%{IdCliente}"/>
    </list>
    </operator>
    <operator activated="true" class="apply_model" compatibility="9.0.002" expanded="true" height="82" name="Apply Model" width="90" x="715" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="annotate" compatibility="9.0.002" expanded="true" height="68" name="Annotate (3)" width="90" x="849" y="34">
    <list key="annotations">
    <parameter key="Comment" value="%{IdCliente}"/>
    </list>
    </operator>
    <connect from_port="single" to_op="Extract Macro (2)" to_port="example set"/>
    <connect from_op="Extract Macro (2)" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Split Data" to_port="example set"/>
    <connect from_op="Split Data" from_port="partition 1" to_op="Linear Regression (2)" to_port="training set"/>
    <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Linear Regression (2)" from_port="model" to_op="Annotate (2)" to_port="input"/>
    <connect from_op="Annotate (2)" from_port="output" to_op="Apply Model" to_port="model"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Annotate (3)" to_port="input"/>
    <connect from_op="Apply Model" from_port="model" to_port="output 2"/>
    <connect from_op="Annotate (3)" from_port="output" to_port="output 1"/>
    <portSpacing port="source_single" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    <portSpacing port="sink_output 3" spacing="0"/>
    <portSpacing port="sink_output 4" spacing="0"/>
    <portSpacing port="sink_output 5" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="collect" compatibility="9.0.002" expanded="true" height="145" name="Collect" width="90" x="514" y="223"/>
    <connect from_op="Read CSV" from_port="output" to_op="Multiply (2)" to_port="input"/>
    <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (5)" to_port="example set input"/>
    <connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate Attributes (3)" to_port="example set input"/>
    <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
    <connect from_op="Remove Duplicates" from_port="example set output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Group Into Collection" to_port="exa"/>
    <connect from_op="Group Into Collection" from_port="col" to_op="Annotate" to_port="input"/>
    <connect from_op="Annotate" from_port="output" to_op="Loop Collection" to_port="collection"/>
    <connect from_op="Loop Collection" from_port="output 1" to_op="Collect" to_port="input 1"/>
    <connect from_op="Loop Collection" from_port="output 2" to_op="Collect" to_port="input 2"/>
    <connect from_op="Loop Collection" from_port="output 3" to_op="Collect" to_port="input 3"/>
    <connect from_op="Loop Collection" from_port="output 4" to_op="Collect" to_port="input 4"/>
    <connect from_op="Collect" from_port="collection" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Modelos" from_port="out 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

     Sure, I attach a sample

     

    Thanks

  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 2,158  RM Data Scientist

    Hi,

    the key operator is Append. it can merge a colection of example sets into one, if the schema is the same.

     

    BR,

    Martin

    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany
    sgenzer
  • sebastian_gonzasebastian_gonza RapidMiner Certified Analyst, Member Posts: 52  Guru

    In the ending of the loop and after the collection does not work, where should the append be placed?

     

    thanks

  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 2,158  RM Data Scientist

    Hi,

    after the loop. you need to ensure that the data has the same schema though.

     

    BR,

    Martin

    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany
  • sebastian_gonzasebastian_gonza RapidMiner Certified Analyst, Member Posts: 52  Guru

    Sorry, I dont understand the same schema, if you refer to the structure of each IOObject, is a linear regression by Id, some time is calculated others not, I get the error "The operator needs a rapidminer input type which is not provided"

     

     

  • sebastian_gonzasebastian_gonza RapidMiner Certified Analyst, Member Posts: 52  Guru

    Sorry, is it possible for you to have a look at the last reply please? thanks

  • sgenzersgenzer 12Administrator, Moderator, Employee, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,535  Community Manager

    Hi @sebastian_gonza - ok I see what's going on here. So it's not a good idea to mix your models with your ExampleSets. It's like apples and oranges, and hence RM has no idea what to do with that (and hence weird error message).

     

    I don't see more than one model in your XML so I'm not sure what you're trying to do there, but this may guide you in a good direction:

     

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="9.0.003" expanded="true" height="82" name="Modelos" width="90" x="45" y="85">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="9.0.003" expanded="true" height="68" name="Read CSV" width="90" x="246" y="34">
    <parameter key="csv_file" value="/Users/genzerconsulting/Desktop/sample.csv"/>
    <list key="annotations"/>
    <list key="data_set_meta_data_information"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.0.003" expanded="true" height="82" name="Multiply (2)" width="90" x="447" y="34"/>
    <operator activated="true" class="select_attributes" compatibility="9.0.003" expanded="true" height="82" name="Select Attributes (5)" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Cantidad|IdCliente|IdPedido|Importe|PrecioFinal"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="9.0.003" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="715" y="34">
    <list key="function_descriptions">
    <parameter key="Logimporte" value="log(Importe+1)"/>
    <parameter key="Logprecio" value="log([PrecioFinal]+1)"/>
    <parameter key="Logcantidades" value="log(Cantidad+1)"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="9.0.003" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
    <parameter key="attribute_name" value="Logimporte"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="remove_duplicates" compatibility="9.0.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="983" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="IdCliente|IdPedido|Importe|Cantidad"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.0.003" expanded="true" height="82" name="Multiply" width="90" x="1117" y="34"/>
    <operator activated="true" class="operator_toolbox:group_into_collection" compatibility="1.5.000" expanded="true" height="82" name="Group Into Collection" width="90" x="45" y="238">
    <parameter key="group_by_attribute" value="IdCliente"/>
    </operator>
    <operator activated="true" class="annotate" compatibility="9.0.003" expanded="true" height="68" name="Annotate" width="90" x="179" y="238">
    <list key="annotations">
    <parameter key="Comment" value="IdCliente"/>
    </list>
    </operator>
    <operator activated="true" class="loop_collection" compatibility="9.0.003" expanded="true" height="103" name="Loop Collection" width="90" x="313" y="238">
    <process expanded="true">
    <operator activated="true" class="extract_macro" compatibility="9.0.003" expanded="true" height="68" name="Extract Macro (2)" width="90" x="45" y="34">
    <parameter key="macro" value="IdCliente"/>
    <parameter key="macro_type" value="data_value"/>
    <parameter key="attribute_name" value="IdCliente"/>
    <parameter key="example_index" value="1"/>
    <list key="additional_macros"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="9.0.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Logcantidades|Logimporte|Logprecio"/>
    </operator>
    <operator activated="true" class="split_data" compatibility="9.0.003" expanded="true" height="103" name="Split Data" width="90" x="313" y="238">
    <enumeration key="partitions">
    <parameter key="ratio" value="0.7"/>
    <parameter key="ratio" value="0.3"/>
    </enumeration>
    </operator>
    <operator activated="true" class="linear_regression" compatibility="9.0.003" expanded="true" height="103" name="Linear Regression (2)" width="90" x="447" y="34">
    <parameter key="eliminate_colinear_features" value="false"/>
    <parameter key="use_bias" value="false"/>
    </operator>
    <operator activated="true" class="annotate" compatibility="9.0.003" expanded="true" height="68" name="Annotate (2)" width="90" x="581" y="34">
    <list key="annotations">
    <parameter key="Comment" value="%{IdCliente}"/>
    </list>
    </operator>
    <operator activated="true" class="apply_model" compatibility="9.0.003" expanded="true" height="82" name="Apply Model" width="90" x="715" y="136">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="annotate" compatibility="9.0.003" expanded="true" height="68" name="Annotate (3)" width="90" x="849" y="34">
    <list key="annotations">
    <parameter key="Comment" value="%{IdCliente}"/>
    </list>
    </operator>
    <connect from_port="single" to_op="Extract Macro (2)" to_port="example set"/>
    <connect from_op="Extract Macro (2)" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Split Data" to_port="example set"/>
    <connect from_op="Split Data" from_port="partition 1" to_op="Linear Regression (2)" to_port="training set"/>
    <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Linear Regression (2)" from_port="model" to_op="Annotate (2)" to_port="input"/>
    <connect from_op="Annotate (2)" from_port="output" to_op="Apply Model" to_port="model"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Annotate (3)" to_port="input"/>
    <connect from_op="Apply Model" from_port="model" to_port="output 2"/>
    <connect from_op="Annotate (3)" from_port="output" to_port="output 1"/>
    <portSpacing port="source_single" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="189"/>
    <portSpacing port="sink_output 3" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="collect" compatibility="9.0.003" expanded="true" height="82" name="Collect" width="90" x="447" y="238"/>
    <operator activated="true" class="append" compatibility="9.0.003" expanded="true" height="82" name="Append" width="90" x="581" y="238"/>
    <connect from_op="Read CSV" from_port="output" to_op="Multiply (2)" to_port="input"/>
    <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (5)" to_port="example set input"/>
    <connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate Attributes (3)" to_port="example set input"/>
    <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
    <connect from_op="Remove Duplicates" from_port="example set output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Group Into Collection" to_port="exa"/>
    <connect from_op="Group Into Collection" from_port="col" to_op="Annotate" to_port="input"/>
    <connect from_op="Annotate" from_port="output" to_op="Loop Collection" to_port="collection"/>
    <connect from_op="Loop Collection" from_port="output 1" to_op="Collect" to_port="input 1"/>
    <connect from_op="Collect" from_port="collection" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="420"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Modelos" from_port="out 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    Scott

    ----------------------
    Don't forget to submit your great ideas for Wisdom 2020! Deadline is November 15.

    Wisdom 2020 – Call for Speakers Form 

Sign In or Register to comment.