RapidMiner

Macros with radoop

Macros with radoop

Hi,
Are macros possible with in a radoop nest?
Cheers
Sven

In fact the next xml  I would like to change to be able to run it in a radoop nest:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
   <process expanded="true">
     <operator activated="true" class="retrieve" compatibility="6.4.000" expanded="true" height="60" name="Retrieve Test20150711(4)" width="90" x="29" y="74">
       <parameter key="repository_entry" value="//Local Repository/data/Test20150711(4)"/>
     </operator>
     <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples" width="90" x="179" y="72">
       <parameter key="invert_filter" value="true"/>
       <list key="filters_list">
         <parameter key="filters_entry_key" value="test.equals.211"/>
       </list>
     </operator>
     <operator activated="true" class="loop_values" compatibility="6.4.000" expanded="true" height="76" name="Loop Values" width="90" x="244" y="364">
       <parameter key="attribute" value="patient"/>
       <process expanded="true">
         <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples (2)" width="90" x="51" y="35">
           <parameter key="parameter_string" value="patient = %{loop_value}"/>
           <parameter key="condition_class" value="attribute_value_filter"/>
           <list key="filters_list"/>
         </operator>
         <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate" width="90" x="209" y="44">
           <list key="aggregation_attributes"/>
           <parameter key="group_by_attributes" value="icustay_expire_flg|patient|value1|charttime|test"/>
         </operator>
         <operator activated="true" class="sort" compatibility="6.4.000" expanded="true" height="76" name="Sort" width="90" x="376" y="30">
           <parameter key="attribute_name" value="charttime"/>
         </operator>
         <operator activated="true" class="extract_macro" compatibility="6.4.000" expanded="true" height="60" name="Extract Macro" width="90" x="493" y="32">
           <parameter key="macro" value="mincharttime"/>
           <parameter key="macro_type" value="data_value"/>
           <parameter key="attribute_name" value="charttime"/>
           <parameter key="example_index" value="1"/>
           <list key="additional_macros"/>
         </operator>
         <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="375" y="130">
           <list key="function_descriptions">
             <parameter key="mincharttime" value="%{mincharttime}"/>
           </list>
         </operator>
         <connect from_port="example set" to_op="Filter Examples (2)" to_port="example set input"/>
         <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
         <connect from_op="Aggregate" from_port="example set output" to_op="Sort" to_port="example set input"/>
         <connect from_op="Aggregate" from_port="original" to_op="Generate Attributes (2)" to_port="example set input"/>
         <connect from_op="Sort" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
         <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="out 1"/>
         <portSpacing port="source_example set" spacing="0"/>
         <portSpacing port="sink_out 1" spacing="0"/>
         <portSpacing port="sink_out 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="append" compatibility="6.4.000" expanded="true" height="76" name="Append" width="90" x="2144" y="124"/>
     <operator activated="true" class="guess_types" compatibility="6.4.000" expanded="true" height="76" name="Guess Types" width="90" x="2290" y="120"/>
     <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Generate Attributes" width="90" x="2415" y="120">
       <list key="function_descriptions">
         <parameter key="diffcharttime" value="(charttime-mincharttime)/60"/>
       </list>
     </operator>
     <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes (3)" width="90" x="2539" y="119">
       <parameter key="attribute_filter_type" value="subset"/>
       <parameter key="attributes" value="patient|diffcharttime|test|value1|icustay_expire_flg"/>
       <parameter key="include_special_attributes" value="true"/>
     </operator>
     <operator activated="true" class="store" compatibility="6.4.000" expanded="true" height="60" name="Store (2)" width="90" x="2662" y="118">
       <parameter key="repository_entry" value="../data/result"/>
     </operator>
     <connect from_op="Retrieve Test20150711(4)" from_port="output" to_op="Filter Examples" to_port="example set input"/>
     <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Values" to_port="example set"/>
     <connect from_op="Loop Values" from_port="out 1" to_op="Append" to_port="example set 1"/>
     <connect from_op="Append" from_port="merged set" to_op="Guess Types" to_port="example set input"/>
     <connect from_op="Guess Types" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
     <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
     <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Store (2)" to_port="input"/>
     <connect from_op="Store (2)" from_port="through" to_port="result 1"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
   </process>
 </operator>
</process>


This is where I landed or crashed:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
   <process expanded="true">
     <operator activated="true" class="radoop:radoop_nest" compatibility="2.1.001" expanded="true" height="76" name="Radoop Nest" width="90" x="23" y="28">
       <parameter key="connection" value="cds.analytics.vancis.nl"/>
       <process expanded="true">
         <operator activated="true" class="radoop:retrieve" compatibility="2.1.001" expanded="true" height="60" name="Retrieve" width="90" x="45" y="38">
           <parameter key="table" value="test20150711"/>
         </operator>
         <operator activated="true" class="radoop:loop" compatibility="2.1.001" expanded="true" height="76" name="Loop (Radoop)" width="90" x="285" y="35">
           <parameter key="set_iteration_macro" value="true"/>
           <parameter key="macro_name" value="patient"/>
           <process expanded="true">
             <operator activated="true" class="radoop:example_filter" compatibility="2.1.001" expanded="true" height="76" name="Filter Examples" width="90" x="59" y="41">
               <parameter key="condition_class" value="attribute_value_filter"/>
               <parameter key="parameter_string" value="patient=%{loop_value}"/>
             </operator>
             <operator activated="true" class="radoop:aggregate" compatibility="2.1.001" expanded="true" height="76" name="Aggregate" width="90" x="179" y="43">
               <list key="aggregation_attributes"/>
               <parameter key="group_by_attributes" value="icustay_expire_flg|patient|charttime|test|value1|"/>
             </operator>
             <operator activated="true" class="radoop:attribute_generate" compatibility="2.1.001" expanded="true" height="76" name="Generate Attributes" width="90" x="318" y="167">
               <parameter key="attribute name" value="mincharttime"/>
               <parameter key="attribute expression" value="%{mincharttime}"/>
               <list key="new_attributes"/>
             </operator>
             <operator activated="true" class="radoop:sort" compatibility="2.1.001" expanded="true" height="76" name="Sort" width="90" x="319" y="39">
               <parameter key="sort attribute" value="charttime"/>
               <list key="additional attributes"/>
             </operator>
             <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/>
             <connect from_op="Filter Examples" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
             <connect from_op="Aggregate" from_port="example set output" to_op="Sort" to_port="example set input"/>
             <connect from_op="Aggregate" from_port="original" to_op="Generate Attributes" to_port="example set input"/>
             <connect from_op="Sort" from_port="example set output" to_port="output 1"/>
             <portSpacing port="source_input 1" spacing="0"/>
             <portSpacing port="source_input 2" spacing="0"/>
             <portSpacing port="sink_output 1" spacing="0"/>
             <portSpacing port="sink_output 2" spacing="0"/>
           </process>
         </operator>
         <connect from_op="Retrieve" from_port="output" to_op="Loop (Radoop)" to_port="input 1"/>
         <connect from_op="Loop (Radoop)" from_port="output 1" to_port="output 1"/>
         <portSpacing port="source_input 1" spacing="0"/>
         <portSpacing port="sink_output 1" spacing="0"/>
         <portSpacing port="sink_output 2" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Radoop Nest" from_port="output 1" to_port="result 1"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
   </process>
 </operator>
</process>


Any suggestions?
Cheers
Sven
3 REPLIES

Re: Macros with radoop

Hi Sven,

inside the Nest the use of macros is limited. This is because the complete Hive statement is created before the process enters the nest. So in your specific case you would need to drag the loop out of the nest instead of looping inside the nest.

Cheers,
~Marius

Re: Macros with radoop

Hi Marius,
thanks for the reply, BUT, the reason I used the Radoop nest is the 40GB vs the 8GB on my laptop. Quite frustrating executing the process on my local machine always stops because of memory shortage even with preprocessing.
Sven

Re: Macros with radoop

Sven, what about something like the process below?
Of course you need to replace the standard operators with their Radoop pendants in the Nests.

~Marius

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="6.4.000" expanded="true" height="60" name="Retrieve Test20150711(4)" width="90" x="45" y="30">
        <parameter key="repository_entry" value="//Local Repository/data/Test20150711(4)"/>
      </operator>
      <operator activated="true" class="radoop:radoop_nest" compatibility="2.3.001" expanded="true" height="76" name="Radoop Nest" width="90" x="45" y="210">
        <enumeration key="tables_to_reload"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="6.4.000" expanded="true" height="60" name="Retrieve Test20150711(4) (2)" width="90" x="45" y="30">
            <parameter key="repository_entry" value="//Local Repository/data/Test20150711(4)"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples (3)" width="90" x="246" y="30">
            <parameter key="invert_filter" value="true"/>
            <list key="filters_list">
              <parameter key="filters_entry_key" value="test.equals.211"/>
            </list>
          </operator>
          <connect from_op="Retrieve Test20150711(4) (2)" from_port="output" to_op="Filter Examples (3)" to_port="example set input"/>
          <connect from_op="Filter Examples (3)" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="loop_values" compatibility="6.4.000" expanded="true" height="60" name="Loop Values (2)" width="90" x="380" y="210">
        <parameter key="attribute" value="patient"/>
        <process expanded="true">
          <operator activated="true" class="radoop:radoop_nest" compatibility="2.3.001" expanded="true" height="60" name="Radoop Nest (2)" width="90" x="246" y="30">
            <enumeration key="tables_to_reload"/>
            <process expanded="true">
              <operator activated="false" class="sort" compatibility="6.4.000" expanded="true" height="76" name="Sort (2)" width="90" x="447" y="30">
                <parameter key="attribute_name" value="charttime"/>
              </operator>
              <operator activated="false" class="extract_macro" compatibility="6.4.000" expanded="true" height="60" name="Extract Macro (2)" width="90" x="581" y="30">
                <parameter key="macro" value="mincharttime"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="attribute_name" value="charttime"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
              </operator>
              <operator activated="true" class="retrieve" compatibility="6.4.000" expanded="true" height="60" name="Retrieve Test20150711(4) (3)" width="90" x="45" y="210">
                <parameter key="repository_entry" value="//Local Repository/data/Test20150711(4)"/>
              </operator>
              <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples (4)" width="90" x="179" y="210">
                <parameter key="parameter_string" value="patient = %{loop_value}"/>
                <parameter key="condition_class" value="attribute_value_filter"/>
                <list key="filters_list"/>
              </operator>
              <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate (2)" width="90" x="337" y="219">
                <list key="aggregation_attributes"/>
                <parameter key="group_by_attributes" value="icustay_expire_flg|patient|value1|charttime|test"/>
              </operator>
              <operator activated="true" class="radoop:aggregate" compatibility="2.3.001" expanded="true" height="76" name="Aggregate (3)" width="90" x="514" y="120">
                <list key="aggregation_attributes">
                  <parameter key="date" value="maximum"/>
                </list>
                <description align="center" color="transparent" colored="false" width="126">replace sort+extract by aggregation</description>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Generate Attributes (3)" width="90" x="503" y="305">
                <list key="function_descriptions">
                  <parameter key="mincharttime" value="%{mincharttime}"/>
                </list>
              </operator>
              <operator activated="true" class="radoop:join" compatibility="2.3.001" expanded="true" height="76" name="Join" width="90" x="648" y="255">
                <list key="key_attributes"/>
                <description align="center" color="transparent" colored="false" width="126">should be a cartesian product if join key is not selected</description>
              </operator>
              <operator activated="true" class="radoop:append" compatibility="2.3.001" expanded="true" height="76" name="Append (2)" width="90" x="782" y="255">
                <parameter key="tablename" value="tableX"/>
              </operator>
              <connect from_op="Retrieve Test20150711(4) (3)" from_port="output" to_op="Filter Examples (4)" to_port="example set input"/>
              <connect from_op="Filter Examples (4)" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
              <connect from_op="Aggregate (2)" from_port="example set output" to_op="Aggregate (3)" to_port="example set input"/>
              <connect from_op="Aggregate (2)" from_port="original" to_op="Generate Attributes (3)" to_port="example set input"/>
              <connect from_op="Aggregate (3)" from_port="example set output" to_op="Join" to_port="left"/>
              <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Join" to_port="right"/>
              <connect from_op="Join" from_port="join" to_op="Append (2)" to_port="example set input"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
            </process>
          </operator>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples" width="90" x="246" y="30">
        <parameter key="invert_filter" value="true"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="test.equals.211"/>
        </list>
      </operator>
      <operator activated="true" class="loop_values" compatibility="6.4.000" expanded="true" height="76" name="Loop Values" width="90" x="380" y="30">
        <parameter key="attribute" value="patient"/>
        <process expanded="true">
          <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples (2)" width="90" x="51" y="35">
            <parameter key="parameter_string" value="patient = %{loop_value}"/>
            <parameter key="condition_class" value="attribute_value_filter"/>
            <list key="filters_list"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate" width="90" x="209" y="44">
            <list key="aggregation_attributes"/>
            <parameter key="group_by_attributes" value="icustay_expire_flg|patient|value1|charttime|test"/>
          </operator>
          <operator activated="true" class="sort" compatibility="6.4.000" expanded="true" height="76" name="Sort" width="90" x="376" y="30">
            <parameter key="attribute_name" value="charttime"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="6.4.000" expanded="true" height="60" name="Extract Macro" width="90" x="493" y="32">
            <parameter key="macro" value="mincharttime"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="attribute_name" value="charttime"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="375" y="130">
            <list key="function_descriptions">
              <parameter key="mincharttime" value="%{mincharttime}"/>
            </list>
          </operator>
          <connect from_port="example set" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Sort" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="original" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Sort" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="6.4.000" expanded="true" height="76" name="Append" width="90" x="514" y="30"/>
      <operator activated="true" class="radoop:radoop_nest" compatibility="2.3.001" expanded="true" height="76" name="Radoop Nest (3)" width="90" x="648" y="210">
        <enumeration key="tables_to_reload"/>
        <process expanded="true">
          <operator activated="true" class="radoop:retrieve" compatibility="2.3.001" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
            <parameter key="table" value="tableX"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Generate Attributes (4)" width="90" x="179" y="30">
            <list key="function_descriptions">
              <parameter key="diffcharttime" value="(charttime-mincharttime)/60"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes (2)" width="90" x="313" y="30">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="patient|diffcharttime|test|value1|icustay_expire_flg"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="store" compatibility="6.4.000" expanded="true" height="60" name="Store (3)" width="90" x="447" y="30">
            <parameter key="repository_entry" value="../data/result"/>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="Generate Attributes (4)" to_port="example set input"/>
          <connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
          <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Store (3)" to_port="input"/>
          <connect from_op="Store (3)" from_port="through" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="guess_types" compatibility="6.4.000" expanded="true" height="76" name="Guess Types" width="90" x="648" y="30"/>
      <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Generate Attributes" width="90" x="782" y="30">
        <list key="function_descriptions">
          <parameter key="diffcharttime" value="(charttime-mincharttime)/60"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes (3)" width="90" x="916" y="30">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="patient|diffcharttime|test|value1|icustay_expire_flg"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="store" compatibility="6.4.000" expanded="true" height="60" name="Store (2)" width="90" x="1050" y="30">
        <parameter key="repository_entry" value="../data/result"/>
      </operator>
      <connect from_op="Retrieve Test20150711(4)" from_port="output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Radoop Nest" from_port="output 1" to_op="Loop Values (2)" to_port="example set"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Values" to_port="example set"/>
      <connect from_op="Loop Values" from_port="out 1" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_op="Guess Types" to_port="example set input"/>
      <connect from_op="Guess Types" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Store (2)" to_port="input"/>
      <connect from_op="Store (2)" from_port="through" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>