Options

Applying T-Test on a collection of performance vectors

suleymansahalsuleymansahal Member Posts: 27 Contributor II
edited November 2018 in Help

Hi. I have a collection of performance vectors. How can I apply pairwise t-test on them? I cannot select collection items synchronously and feed to t-test operator. I tried a couple of things and searched the forum, but I did not find anything useful and could not figure it out. Thanks.

Tagged:

Best Answer

  • Options
    IngoRMIngoRM Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, Community Manager, RMResearcher, Member, University Professor Posts: 1,751 RM Founder
    Solution Accepted

    Hi,

     

    The T-Test operator indeed only accepts single performance vectors, no collections.  If you know in advance how many performance vector you will have, you can use the operator "Select" to pick them one by one and feed them into the T-Test operator (see process below).  This is of course only doable for small amounts of performance vectors.

     

    Hope this helps,

    Ingo

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Root">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="7.3.001" expanded="true" height="124" name="Subprocess" width="90" x="45" y="136">
    <process expanded="true">
    <operator activated="true" class="generate_data" compatibility="7.1.001" expanded="true" height="68" name="Generate Data" width="90" x="45" y="120">
    <parameter key="target_function" value="one variable non linear"/>
    <parameter key="number_examples" value="80"/>
    <parameter key="number_of_attributes" value="1"/>
    <parameter key="attributes_lower_bound" value="-40.0"/>
    <parameter key="attributes_upper_bound" value="30.0"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="7.3.001" expanded="true" height="124" name="Multiply" width="90" x="179" y="120"/>
    <operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation (3)" width="90" x="380" y="391">
    <parameter key="sampling_type" value="shuffled sampling"/>
    <process expanded="true">
    <operator activated="true" class="k_nn" compatibility="7.3.001" expanded="true" height="82" name="k-NN" width="90" x="45" y="34">
    <parameter key="k" value="5"/>
    </operator>
    <connect from_port="training set" to_op="k-NN" to_port="training set"/>
    <connect from_op="k-NN" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="7.3.001" expanded="true" height="82" name=" Performance (2)" width="90" x="179" y="34">
    <parameter key="root_mean_squared_error" value="false"/>
    <parameter key="absolute_error" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (2)" from_port="labelled data" to_op=" Performance (2)" to_port="labelled data"/>
    <connect from_op=" Performance (2)" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation" width="90" x="380" y="34">
    <parameter key="sampling_type" value="shuffled sampling"/>
    <process expanded="true">
    <operator activated="true" class="support_vector_machine_libsvm" compatibility="7.3.001" expanded="true" height="82" name="Support Vector Machine (LibSVM)" width="90" x="45" y="34">
    <parameter key="svm_type" value="nu-SVR"/>
    <parameter key="C" value="10000.0"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="training set" to_op="Support Vector Machine (LibSVM)" to_port="training set"/>
    <connect from_op="Support Vector Machine (LibSVM)" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="7.3.001" expanded="true" height="82" name=" Performance (Regression)" width="90" x="190" y="34">
    <parameter key="root_mean_squared_error" value="false"/>
    <parameter key="absolute_error" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op=" Performance (Regression)" to_port="labelled data"/>
    <connect from_op=" Performance (Regression)" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation (2)" width="90" x="380" y="187">
    <parameter key="sampling_type" value="shuffled sampling"/>
    <process expanded="true">
    <operator activated="true" class="linear_regression" compatibility="7.3.001" expanded="true" height="103" name="Linear Regression" width="90" x="45" y="34"/>
    <connect from_port="training set" to_op="Linear Regression" to_port="training set"/>
    <connect from_op="Linear Regression" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model " width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="7.3.001" expanded="true" height="82" name=" Performance (Regression) " width="90" x="179" y="34">
    <parameter key="root_mean_squared_error" value="false"/>
    <parameter key="absolute_error" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model " to_port="model"/>
    <connect from_port="test set" to_op="Apply Model " to_port="unlabelled data"/>
    <connect from_op="Apply Model " from_port="labelled data" to_op=" Performance (Regression) " to_port="labelled data"/>
    <connect from_op=" Performance (Regression) " from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Generate Data" from_port="output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Cross Validation" to_port="example set"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Cross Validation (2)" to_port="example set"/>
    <connect from_op="Multiply" from_port="output 3" to_op="Cross Validation (3)" to_port="example set"/>
    <connect from_op="Cross Validation (3)" from_port="performance 1" to_port="out 3"/>
    <connect from_op="Cross Validation" from_port="performance 1" to_port="out 1"/>
    <connect from_op="Cross Validation (2)" from_port="performance 1" to_port="out 2"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="36"/>
    <portSpacing port="sink_out 2" spacing="162"/>
    <portSpacing port="sink_out 3" spacing="0"/>
    <portSpacing port="sink_out 4" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="collect" compatibility="7.3.001" expanded="true" height="124" name="Collect" width="90" x="179" y="136"/>
    <operator activated="true" class="multiply" compatibility="7.3.001" expanded="true" height="124" name="Multiply (2)" width="90" x="313" y="136"/>
    <operator activated="true" class="select" compatibility="7.3.001" expanded="true" height="68" name="Select (3)" width="90" x="447" y="238">
    <parameter key="index" value="3"/>
    </operator>
    <operator activated="true" class="select" compatibility="7.3.001" expanded="true" height="68" name="Select" width="90" x="447" y="34"/>
    <operator activated="true" class="select" compatibility="7.3.001" expanded="true" height="68" name="Select (2)" width="90" x="447" y="136">
    <parameter key="index" value="2"/>
    </operator>
    <operator activated="true" class="t_test" compatibility="7.3.001" expanded="true" height="145" name="T-Test" width="90" x="581" y="136"/>
    <connect from_op="Subprocess" from_port="out 1" to_op="Collect" to_port="input 1"/>
    <connect from_op="Subprocess" from_port="out 2" to_op="Collect" to_port="input 2"/>
    <connect from_op="Subprocess" from_port="out 3" to_op="Collect" to_port="input 3"/>
    <connect from_op="Collect" from_port="collection" to_op="Multiply (2)" to_port="input"/>
    <connect from_op="Multiply (2)" from_port="output 1" to_op="Select" to_port="collection"/>
    <connect from_op="Multiply (2)" from_port="output 2" to_op="Select (2)" to_port="collection"/>
    <connect from_op="Multiply (2)" from_port="output 3" to_op="Select (3)" to_port="collection"/>
    <connect from_op="Select (3)" from_port="selected" to_op="T-Test" to_port="performance 3"/>
    <connect from_op="Select" from_port="selected" to_op="T-Test" to_port="performance 1"/>
    <connect from_op="Select (2)" from_port="selected" to_op="T-Test" to_port="performance 2"/>
    <connect from_op="T-Test" from_port="significance" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="105"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

Answers

  • Options
    suleymansahalsuleymansahal Member Posts: 27 Contributor II

    Thank you so much. I was more inclined to solutions with loop and it was more consusing. There are about 30 vectors but it seems doable :) I am thinking one generate macro operator (to automate index values) together with a select operator wrapped in a subprocess and to copy it 30 times. Thank you again.

Sign In or Register to comment.