🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

Difference between local random seed and global random seed

christian1983christian1983 Member Posts: 11 Contributor II
Hello everybody,

When trying to understand Rapid Miner 5.0 in detail, i wondered, why the results of the following validation process show differences among them despite the same local random seed. So my question is, what are the reasons for that, maybe i`m not familiar with the topic of local random seed and global random seed, but as far as i know, when using the same local random seed for each validation, the data for testing and training are splitted the same way, so no difference between the validation results should be expected.

Here is the process:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.0.10" expanded="true" name="Process">
    <process expanded="true" height="377" width="620">
      <operator activated="true" class="retrieve" compatibility="5.0.10" expanded="true" height="60" name="Retrieve" width="90" x="45" y="165">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="set_macro" compatibility="5.0.10" expanded="true" height="76" name="Set Macro" width="90" x="45" y="255">
        <parameter key="macro" value="max"/>
        <parameter key="value" value="2"/>
      </operator>
      <operator activated="true" class="loop" compatibility="5.0.10" expanded="true" height="94" name="Loop" width="90" x="179" y="255">
        <parameter key="iterations" value="%{max}"/>
        <process expanded="true" height="391" width="638">
          <operator activated="true" class="generate_macro" compatibility="5.0.10" expanded="true" height="76" name="Generate Macro" width="90" x="40" y="70">
            <list key="function_descriptions">
              <parameter key="current_iteration" value="%{a}"/>
            </list>
          </operator>
          <operator activated="true" class="select_subprocess" compatibility="5.0.10" expanded="true" height="76" name="Select Subprocess" width="90" x="179" y="75">
            <parameter key="select_which" value="%{current_iteration}"/>
            <process expanded="true" height="391" width="294">
              <operator activated="true" class="x_validation" compatibility="5.0.10" expanded="true" height="112" name="Validation (3)" width="90" x="55" y="32">
                <parameter key="use_local_random_seed" value="true"/>
                <process expanded="true" height="391" width="294">
                  <operator activated="true" class="neural_net" compatibility="5.0.10" expanded="true" height="76" name="Neural Net (3)" width="90" x="56" y="48">
                    <list key="hidden_layers"/>
                  </operator>
                  <connect from_port="training" to_op="Neural Net (3)" to_port="training set"/>
                  <connect from_op="Neural Net (3)" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="391" width="294">
                  <operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (3)" width="90" x="69" y="51">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance" compatibility="5.0.10" expanded="true" height="76" name="Performance (3)" width="90" x="168" y="165"/>
                  <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
                  <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
                  <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="input 1" to_op="Validation (3)" to_port="training"/>
              <connect from_op="Validation (3)" from_port="averagable 1" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
            <process expanded="true" height="391" width="294">
              <operator activated="true" class="x_validation" compatibility="5.0.10" expanded="true" height="112" name="Validation (4)" width="90" x="45" y="75">
                <parameter key="use_local_random_seed" value="true"/>
                <process expanded="true" height="391" width="294">
                  <operator activated="true" class="neural_net" compatibility="5.0.10" expanded="true" height="76" name="Neural Net (4)" width="90" x="102" y="30">
                    <list key="hidden_layers"/>
                  </operator>
                  <connect from_port="training" to_op="Neural Net (4)" to_port="training set"/>
                  <connect from_op="Neural Net (4)" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="391" width="294">
                  <operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (4)" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance" compatibility="5.0.10" expanded="true" height="76" name="Performance (4)" width="90" x="169" y="30"/>
                  <connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model (4)" to_port="unlabelled data"/>
                  <connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance (4)" to_port="labelled data"/>
                  <connect from_op="Performance (4)" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="input 1" to_op="Validation (4)" to_port="training"/>
              <connect from_op="Validation (4)" from_port="averagable 1" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Generate Macro" to_port="through 1"/>
          <connect from_op="Generate Macro" from_port="through 1" to_op="Select Subprocess" to_port="input 1"/>
          <connect from_op="Select Subprocess" from_port="output 1" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="source_input 3" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Loop" to_port="input 1"/>
      <connect from_op="Set Macro" from_port="through 1" to_op="Loop" to_port="input 2"/>
      <connect from_op="Loop" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>


I hope you can help me, maybe someone can give me a brief explanation of the use of local and global random seed.

Thank you very much.

Answers

  • IngoRMIngoRM Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, Community Manager, RMResearcher, Member, University Professor Posts: 1,661  RM Founder
    Hi,

    not sure if this helps but the topic of local vs. global random seed has be covered here already quite a few times. Did you try a search first? Maybe this already helps...

    Cheers,
    Ingo
    RapidMiner Wisdom 2020
    February 11th and 12th 2020 in Boston, MA, USA

  • dan_agapedan_agape Member Posts: 106  Guru
    Hi

    Perhaps you already sorted out your question as it's been some time now, but if not, here is an answer. Indeed, data for testing and training are split the same way, but randomness is involved the second time in your process, namely when the model is built - see the shuffle parameter in the Neural Net operator box. You have no option to specify a local seed there, so you cannot control  at all how data was shuffled before it feeds the neural net to be trained. Now, if you learn a neural net twice from the same dataset, and with the same specified parameters, so everything is the same apart the order of feeding with data that differs (because the dataset can be reordered differently by shuffling in each case), you can get a pair of two distinct neural nets.

    So if you deselect shuffle, everything is the same in both cases, and you should get the same overall result now.

    Dan

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.0">
     <context>
       <input/>
       <output/>
       <macros/>
     </context>
     <operator activated="true" class="process" compatibility="5.0.10" expanded="true" name="Process">
       <parameter key="logverbosity" value="init"/>
       <parameter key="random_seed" value="2001"/>
       <parameter key="send_mail" value="never"/>
       <parameter key="process_duration_for_mail" value="30"/>
       <parameter key="encoding" value="SYSTEM"/>
       <parameter key="parallelize_main_process" value="false"/>
       <process expanded="true" height="377" width="620">
         <operator activated="true" class="retrieve" compatibility="5.0.10" expanded="true" height="60" name="Retrieve" width="90" x="45" y="165">
           <parameter key="repository_entry" value="//Samples/data/Iris"/>
         </operator>
         <operator activated="true" class="set_macro" compatibility="5.0.10" expanded="true" height="76" name="Set Macro" width="90" x="45" y="255">
           <parameter key="macro" value="max"/>
           <parameter key="value" value="2"/>
         </operator>
         <operator activated="true" class="loop" compatibility="5.0.10" expanded="true" height="94" name="Loop" width="90" x="179" y="255">
           <parameter key="iterations" value="%{max}"/>
           <parameter key="limit_time" value="false"/>
           <parameter key="timeout" value="1"/>
           <parameter key="parallelize_iteration" value="false"/>
           <process expanded="true" height="391" width="638">
             <operator activated="true" class="generate_macro" compatibility="5.0.10" expanded="true" height="76" name="Generate Macro" width="90" x="40" y="70">
               <list key="function_descriptions">
                 <parameter key="current_iteration" value="%{a}"/>
               </list>
               <parameter key="use_standard_constants" value="true"/>
             </operator>
             <operator activated="true" class="select_subprocess" compatibility="5.0.10" expanded="true" height="76" name="Select Subprocess" width="90" x="179" y="75">
               <parameter key="select_which" value="%{current_iteration}"/>
               <parameter key="parallelize_selection_1" value="false"/>
               <parameter key="parallelize_selection_2" value="false"/>
               <process expanded="true" height="391" width="294">
                 <operator activated="true" class="x_validation" compatibility="5.0.10" expanded="true" height="112" name="Validation (3)" width="90" x="55" y="32">
                   <parameter key="create_complete_model" value="false"/>
                   <parameter key="average_performances_only" value="true"/>
                   <parameter key="leave_one_out" value="false"/>
                   <parameter key="number_of_validations" value="10"/>
                   <parameter key="sampling_type" value="stratified sampling"/>
                   <parameter key="use_local_random_seed" value="true"/>
                   <parameter key="local_random_seed" value="1992"/>
                   <parameter key="parallelize_training" value="false"/>
                   <parameter key="parallelize_testing" value="false"/>
                   <process expanded="true" height="391" width="294">
                     <operator activated="true" class="neural_net" compatibility="5.0.10" expanded="true" height="76" name="Neural Net (3)" width="90" x="56" y="48">
                       <list key="hidden_layers"/>
                       <parameter key="training_cycles" value="500"/>
                       <parameter key="learning_rate" value="0.3"/>
                       <parameter key="momentum" value="0.2"/>
                       <parameter key="decay" value="false"/>
                       <parameter key="shuffle" value="false"/>
                       <parameter key="normalize" value="true"/>
                       <parameter key="error_epsilon" value="1.0E-5"/>
                       <parameter key="use_local_random_seed" value="false"/>
                       <parameter key="local_random_seed" value="1992"/>
                     </operator>
                     <connect from_port="training" to_op="Neural Net (3)" to_port="training set"/>
                     <connect from_op="Neural Net (3)" from_port="model" to_port="model"/>
                     <portSpacing port="source_training" spacing="0"/>
                     <portSpacing port="sink_model" spacing="0"/>
                     <portSpacing port="sink_through 1" spacing="0"/>
                   </process>
                   <process expanded="true" height="391" width="294">
                     <operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (3)" width="90" x="69" y="51">
                       <list key="application_parameters"/>
                       <parameter key="create_view" value="false"/>
                     </operator>
                     <operator activated="true" class="performance" compatibility="5.0.10" expanded="true" height="76" name="Performance (3)" width="90" x="168" y="165">
                       <parameter key="use_example_weights" value="true"/>
                     </operator>
                     <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
                     <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
                     <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
                     <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
                     <portSpacing port="source_model" spacing="0"/>
                     <portSpacing port="source_test set" spacing="0"/>
                     <portSpacing port="source_through 1" spacing="0"/>
                     <portSpacing port="sink_averagable 1" spacing="0"/>
                     <portSpacing port="sink_averagable 2" spacing="0"/>
                   </process>
                 </operator>
                 <connect from_port="input 1" to_op="Validation (3)" to_port="training"/>
                 <connect from_op="Validation (3)" from_port="averagable 1" to_port="output 1"/>
                 <portSpacing port="source_input 1" spacing="0"/>
                 <portSpacing port="source_input 2" spacing="0"/>
                 <portSpacing port="sink_output 1" spacing="0"/>
                 <portSpacing port="sink_output 2" spacing="0"/>
               </process>
               <process expanded="true" height="391" width="294">
                 <operator activated="true" class="x_validation" compatibility="5.0.10" expanded="true" height="112" name="Validation (4)" width="90" x="45" y="75">
                   <parameter key="create_complete_model" value="false"/>
                   <parameter key="average_performances_only" value="true"/>
                   <parameter key="leave_one_out" value="false"/>
                   <parameter key="number_of_validations" value="10"/>
                   <parameter key="sampling_type" value="stratified sampling"/>
                   <parameter key="use_local_random_seed" value="true"/>
                   <parameter key="local_random_seed" value="1992"/>
                   <parameter key="parallelize_training" value="false"/>
                   <parameter key="parallelize_testing" value="false"/>
                   <process expanded="true" height="391" width="294">
                     <operator activated="true" class="neural_net" compatibility="5.0.10" expanded="true" height="76" name="Neural Net (4)" width="90" x="102" y="30">
                       <list key="hidden_layers"/>
                       <parameter key="training_cycles" value="500"/>
                       <parameter key="learning_rate" value="0.3"/>
                       <parameter key="momentum" value="0.2"/>
                       <parameter key="decay" value="false"/>
                       <parameter key="shuffle" value="false"/>
                       <parameter key="normalize" value="true"/>
                       <parameter key="error_epsilon" value="1.0E-5"/>
                       <parameter key="use_local_random_seed" value="false"/>
                       <parameter key="local_random_seed" value="1992"/>
                     </operator>
                     <connect from_port="training" to_op="Neural Net (4)" to_port="training set"/>
                     <connect from_op="Neural Net (4)" from_port="model" to_port="model"/>
                     <portSpacing port="source_training" spacing="0"/>
                     <portSpacing port="sink_model" spacing="0"/>
                     <portSpacing port="sink_through 1" spacing="0"/>
                   </process>
                   <process expanded="true" height="391" width="294">
                     <operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (4)" width="90" x="45" y="30">
                       <list key="application_parameters"/>
                       <parameter key="create_view" value="false"/>
                     </operator>
                     <operator activated="true" class="performance" compatibility="5.0.10" expanded="true" height="76" name="Performance (4)" width="90" x="169" y="30">
                       <parameter key="use_example_weights" value="true"/>
                     </operator>
                     <connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
                     <connect from_port="test set" to_op="Apply Model (4)" to_port="unlabelled data"/>
                     <connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance (4)" to_port="labelled data"/>
                     <connect from_op="Performance (4)" from_port="performance" to_port="averagable 1"/>
                     <portSpacing port="source_model" spacing="0"/>
                     <portSpacing port="source_test set" spacing="0"/>
                     <portSpacing port="source_through 1" spacing="0"/>
                     <portSpacing port="sink_averagable 1" spacing="0"/>
                     <portSpacing port="sink_averagable 2" spacing="0"/>
                   </process>
                 </operator>
                 <connect from_port="input 1" to_op="Validation (4)" to_port="training"/>
                 <connect from_op="Validation (4)" from_port="averagable 1" to_port="output 1"/>
                 <portSpacing port="source_input 1" spacing="0"/>
                 <portSpacing port="source_input 2" spacing="0"/>
                 <portSpacing port="sink_output 1" spacing="0"/>
                 <portSpacing port="sink_output 2" spacing="0"/>
               </process>
             </operator>
             <connect from_port="input 1" to_op="Generate Macro" to_port="through 1"/>
             <connect from_op="Generate Macro" from_port="through 1" to_op="Select Subprocess" to_port="input 1"/>
             <connect from_op="Select Subprocess" from_port="output 1" to_port="output 1"/>
             <portSpacing port="source_input 1" spacing="0"/>
             <portSpacing port="source_input 2" spacing="0"/>
             <portSpacing port="source_input 3" spacing="0"/>
             <portSpacing port="sink_output 1" spacing="0"/>
             <portSpacing port="sink_output 2" spacing="0"/>
           </process>
         </operator>
         <connect from_op="Retrieve" from_port="output" to_op="Loop" to_port="input 1"/>
         <connect from_op="Set Macro" from_port="through 1" to_op="Loop" to_port="input 2"/>
         <connect from_op="Loop" from_port="output 1" to_port="result 1"/>
         <portSpacing port="source_input 1" spacing="0"/>
         <portSpacing port="sink_result 1" spacing="0"/>
         <portSpacing port="sink_result 2" spacing="0"/>
       </process>
     </operator>
    </process>
Sign In or Register to comment.