Options

"Loop over all combination of 2 attributes"

MuehliManMuehliMan Member Posts: 85 Maven
edited June 2019 in Help
Hello again,

I am again asking to community for help, as my wisdom has come to and end here. Here is a short descriptoion of what the code should do:
read input --> preprocessing + data preparation --> loop over combination of 2 of attributes --> Build and evaluate Decision tree for all combinations --> Write Attributes and Feature names to log and then to csv
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process">
    <process expanded="true" height="566" width="1619">
      <operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="120">
        <parameter key="file_name" value="E:\binary_preprocessed.csv"/>
        <parameter key="comment_characters" value="*"/>
        <parameter key="column_separators" value=","/>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role" width="90" x="179" y="120">
        <parameter key="name" value="ID"/>
        <parameter key="target_role" value="id"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role (2)" width="90" x="313" y="120">
        <parameter key="name" value="activity"/>
        <parameter key="target_role" value="label"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.0.8" expanded="true" height="76" name="only numeric" width="90" x="447" y="120">
        <parameter key="attribute_filter_type" value="value_type"/>
        <parameter key="regular_expression" value="pKa1ACD10|pKa2ACD10"/>
        <parameter key="value_type" value="numeric"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples" width="90" x="581" y="120">
        <parameter key="condition_class" value="no_missing_attributes"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples (2)" width="90" x="715" y="120">
        <parameter key="condition_class" value="no_missing_labels"/>
      </operator>
      <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="849" y="120">
        <parameter key="use_exact_number" value="true"/>
        <parameter key="exact_number_of_attributes" value="2"/>
        <parameter key="max_number_of_attributes" value="5"/>
        <process expanded="true" height="665" width="1094">
          <operator activated="true" class="extract_macro" compatibility="5.0.8" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
            <parameter key="macro" value="atts"/>
            <parameter key="macro_type" value="number_of_attributes"/>
          </operator>
          <operator activated="true" class="generate_macro" compatibility="5.0.8" expanded="true" height="76" name="treedepth" width="90" x="179" y="30">
            <list key="function_descriptions">
              <parameter key="treedepth" value="2 * %{atts} + 1"/>
            </list>
          </operator>
          <operator activated="true" class="x_validation" compatibility="5.0.8" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
            <parameter key="use_local_random_seed" value="true"/>
            <parameter key="local_random_seed" value="10"/>
            <process expanded="true" height="647" width="424">
              <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30">
                <parameter key="criterion" value="information_gain"/>
              </operator>
              <connect from_port="training" to_op="Decision Tree" to_port="training set"/>
              <connect from_op="Decision Tree" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true" height="647" width="424">
              <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                <list key="application_parameters"/>
                <parameter key="create_view" value="true"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
                <parameter key="main_criterion" value="youden"/>
                <parameter key="youden" value="true"/>
                <parameter key="psep" value="true"/>
              </operator>
              <connect from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="76" name="Log" width="90" x="447" y="30">
            <parameter key="filename" value="fs_2_atts.log"/>
            <list key="log">
              <parameter key="youden" value="operator.Validation.value.performance"/>
              <parameter key="psep" value="operator.Validation.value.performance2"/>
              <parameter key="accuracy" value="operator.Validation.value.performance3"/>
              <parameter key="feature_names" value="operator.Loop Subsets.value.feature_names"/>
              <parameter key="feature_number" value="operator.Loop Subsets.value.feature_number"/>
              <parameter key="deviation" value="operator.Validation.value.deviation"/>
            </list>
            <parameter key="sorting_type" value="top-k"/>
            <parameter key="sorting_dimension" value="youden"/>
          </operator>
          <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="treedepth" to_port="through 1"/>
          <connect from_op="treedepth" from_port="through 1" to_op="Validation" to_port="training"/>
          <connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
          <portSpacing port="source_example set" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data (2)" width="90" x="983" y="120"/>
      <operator activated="true" class="write_csv" compatibility="5.0.8" expanded="true" height="60" name="Write CSV" width="90" x="1117" y="120">
        <parameter key="csv_file" value="%{path}\%{set}_%{subset}_fs_2_atts.csv"/>
      </operator>
      <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="only numeric" to_port="example set input"/>
      <connect from_op="only numeric" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
      <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Loop Subsets" from_port="example set" to_op="Log to Data (2)" to_port="through 1"/>
      <connect from_op="Log to Data (2)" from_port="exampleSet" to_op="Write CSV" to_port="input"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
    </process>
  </operator>
</process>
But unfortunately is is stopping after some time, crashing the whole programm. I dont know if i make an error in der workflow or maybe it is using too much memory. Well, maybe some of you guys can give mie a tips.

Answers

  • Options
    MuehliManMuehliMan Member Posts: 85 Maven
    I just want to add, that it freezes the Process timer, for very long times. So maybe it is just using so much memory, that there is rarley any left for the programm itself. Is there anyoption to use only 90% of the available memory to keep something for the programm itself?

    Cheers,
    Markus
  • Options
    landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531 Unicorn
    Hi Markus,
    this is not possible, otherwise we would have done so long time ago :) I think you will need either more memory or test if there's some problem with the tree itself.
    By the way: You are generating a tree_depth macro, but do not use it for the  tree construction...

    Greetings,
      Sebastian
  • Options
    MuehliManMuehliMan Member Posts: 85 Maven
    Hey Sebastian,

    you are right. Treedepth was is not needed, as it is always 2 attributes. I think it is a bad idea to write/store the log at every iteration right? Would free memory help as it deletes all models and views?
    How do I get the log updated within the loop and written only after the iteration is finished?
    Exception in thread "AWT-EventQueue-0" java.lang.OutOfMemoryError: GC overhead limit exceeded
    Exception in thread "AWT-EventQueue-0" java.lang.ArrayIndexOutOfBoundsException
    Overall this message does not sound to good. This message appears not from the beginning, but after some time, most likely when the Process timer completely freezes.

    Cheers,
    Markus
  • Options
    landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531 Unicorn
    Hi Markus,
    indeed this message does not sound good. But as far as I saw, your attributes are numerical, aren't they? That's important, because the tree can use numerical attributes for multiple splits. Thus the depth can be in worst case equal to number of examples and this would definitively result in an out of memory exception...

    The free memory operator will not free anything that would not have been freed before java throws an out of memory exception.

    Greetings,
      Sebastian
  • Options
    MuehliManMuehliMan Member Posts: 85 Maven
    Dear Sebastian,

    Yes my attributes are numerical. To avoid a tree putting just one example into one split, I intent to use the minumum exaples per split feature and I set the maximum treedepth lower.
    I tried to modifiy the workflow by writing the log not to the hard drive but to the memory, but it did not change much. Here is my version I am working with  at the moment.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.0">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process">
        <parameter key="logverbosity" value="3"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="1"/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="parallelize_main_process" value="false"/>
        <process expanded="true" height="566" width="1619">
          <operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro" width="90" x="45" y="30">
            <parameter key="macro" value="path"/>
            <parameter key="value" value="C:\Daten"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro (2)" width="90" x="179" y="30">
            <parameter key="macro" value="set"/>
            <parameter key="value" value="ASM"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro (3)" width="90" x="313" y="30">
            <parameter key="macro" value="subset"/>
            <parameter key="value" value="full"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro (4)" width="90" x="447" y="30">
            <parameter key="macro" value="subset"/>
            <parameter key="value" value="full"/>
          </operator>
          <operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="120">
            <parameter key="file_name" value="%{path}\input_preprocessed.csv"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="trim_lines" value="false"/>
            <parameter key="skip_comments" value="true"/>
            <parameter key="comment_characters" value="*"/>
            <parameter key="use_first_row_as_attribute_names" value="true"/>
            <parameter key="use_quotes" value="true"/>
            <parameter key="quotes_character" value="&quot;"/>
            <parameter key="column_separators" value=","/>
            <parameter key="parse_numbers" value="true"/>
            <parameter key="decimal_character" value="."/>
            <parameter key="grouped_digits" value="false"/>
            <parameter key="grouping_character" value=","/>
            <parameter key="date_format" value="yyyy-MM-dd"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role" width="90" x="179" y="120">
            <parameter key="name" value="CID"/>
            <parameter key="target_role" value="id"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role (2)" width="90" x="313" y="120">
            <parameter key="name" value="activity"/>
            <parameter key="target_role" value="label"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.0.8" expanded="true" height="76" name="only numeric" width="90" x="447" y="120">
            <parameter key="attribute_filter_type" value="value_type"/>
            <parameter key="attribute" value=""/>
            <parameter key="regular_expression" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="numeric"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="11"/>
            <parameter key="block_type" value="0"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="8"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples" width="90" x="581" y="120">
            <parameter key="condition_class" value="no_missing_attributes"/>
            <parameter key="invert_filter" value="false"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples (2)" width="90" x="715" y="120">
            <parameter key="condition_class" value="no_missing_labels"/>
            <parameter key="invert_filter" value="false"/>
          </operator>
          <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="849" y="120">
            <parameter key="use_exact_number" value="true"/>
            <parameter key="exact_number_of_attributes" value="2"/>
            <parameter key="min_number_of_attributes" value="1"/>
            <parameter key="limit_max_number" value="false"/>
            <parameter key="max_number_of_attributes" value="5"/>
            <parameter key="parallelize_subprocess" value="false"/>
            <process expanded="true" height="665" width="1094">
              <operator activated="true" class="extract_macro" compatibility="5.0.8" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
                <parameter key="macro" value="atts"/>
                <parameter key="macro_type" value="number_of_attributes"/>
                <parameter key="statistics" value="0"/>
                <parameter key="attribute_name" value=""/>
              </operator>
              <operator activated="false" class="generate_macro" compatibility="5.0.8" expanded="true" height="60" name="treedepth" width="90" x="179" y="30">
                <list key="function_descriptions">
                  <parameter key="treedepth" value="2 * %{atts} + 1"/>
                </list>
                <parameter key="use_standard_constants" value="true"/>
              </operator>
              <operator activated="true" class="x_validation" compatibility="5.0.8" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
                <parameter key="create_complete_model" value="false"/>
                <parameter key="average_performances_only" value="true"/>
                <parameter key="leave_one_out" value="false"/>
                <parameter key="number_of_validations" value="10"/>
                <parameter key="sampling_type" value="2"/>
                <parameter key="use_local_random_seed" value="true"/>
                <parameter key="local_random_seed" value="10"/>
                <parameter key="parallelize_training" value="false"/>
                <parameter key="parallelize_testing" value="false"/>
                <process expanded="true" height="647" width="424">
                  <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30">
                    <parameter key="criterion" value="information_gain"/>
                    <parameter key="minimal_size_for_split" value="1"/>
                    <parameter key="minimal_leaf_size" value="1"/>
                    <parameter key="minimal_gain" value="0.05"/>
                    <parameter key="maximal_depth" value="%{treedepth}"/>
                    <parameter key="confidence" value="0.25"/>
                    <parameter key="number_of_prepruning_alternatives" value="3"/>
                    <parameter key="no_pre_pruning" value="false"/>
                    <parameter key="no_pruning" value="false"/>
                  </operator>
                  <connect from_port="training" to_op="Decision Tree" to_port="training set"/>
                  <connect from_op="Decision Tree" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="647" width="424">
                  <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                    <parameter key="create_view" value="true"/>
                  </operator>
                  <operator activated="true" class="performance_binominal_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
                    <parameter key="main_criterion" value="youden"/>
                    <parameter key="accuracy" value="true"/>
                    <parameter key="classification_error" value="false"/>
                    <parameter key="kappa" value="false"/>
                    <parameter key="AUC (optimistic)" value="false"/>
                    <parameter key="AUC" value="false"/>
                    <parameter key="AUC (pessimistic)" value="false"/>
                    <parameter key="precision" value="false"/>
                    <parameter key="recall" value="false"/>
                    <parameter key="lift" value="false"/>
                    <parameter key="fallout" value="false"/>
                    <parameter key="f_measure" value="false"/>
                    <parameter key="false_positive" value="false"/>
                    <parameter key="false_negative" value="false"/>
                    <parameter key="true_positive" value="false"/>
                    <parameter key="true_negative" value="false"/>
                    <parameter key="sensitivity" value="false"/>
                    <parameter key="specificity" value="false"/>
                    <parameter key="youden" value="true"/>
                    <parameter key="positive_predictive_value" value="false"/>
                    <parameter key="negative_predictive_value" value="false"/>
                    <parameter key="psep" value="true"/>
                    <parameter key="skip_undefined_labels" value="false"/>
                    <parameter key="use_example_weights" value="true"/>
                  </operator>
                  <connect from_port="model" to_op="Apply Model" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
                  <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
                  <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="76" name="Log" width="90" x="447" y="30">
                <parameter key="filename" value="%{path}\%{set}_%{subset}_fs_2_atts.log"/>
                <list key="log">
                  <parameter key="youden" value="operator.Validation.value.performance"/>
                  <parameter key="psep" value="operator.Validation.value.performance2"/>
                  <parameter key="accuracy" value="operator.Validation.value.performance3"/>
                  <parameter key="feature_names" value="operator.Loop Subsets.value.feature_names"/>
                  <parameter key="feature_number" value="operator.Loop Subsets.value.feature_number"/>
                  <parameter key="deviation" value="operator.Validation.value.deviation"/>
                </list>
                <parameter key="sorting_type" value="top-k"/>
                <parameter key="sorting_dimension" value="youden"/>
                <parameter key="sorting_k" value="100"/>
                <parameter key="persistent" value="false"/>
              </operator>
              <operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data (2)" width="90" x="581" y="30"/>
              <operator activated="true" class="remember" compatibility="5.0.8" expanded="true" height="60" name="Remember" width="90" x="715" y="30">
                <parameter key="name" value="stack"/>
                <parameter key="io_object" value="ExampleSet"/>
                <parameter key="store_which" value="1"/>
                <parameter key="remove_from_process" value="true"/>
              </operator>
              <operator activated="true" class="free_memory" compatibility="5.0.8" expanded="true" height="76" name="Free Memory" width="90" x="849" y="30"/>
              <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Extract Macro" from_port="example set" to_op="Validation" to_port="training"/>
              <connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
              <connect from_op="Log" from_port="through 1" to_op="Log to Data (2)" to_port="through 1"/>
              <connect from_op="Log to Data (2)" from_port="exampleSet" to_op="Remember" to_port="store"/>
              <connect from_op="Remember" from_port="stored" to_op="Free Memory" to_port="through 1"/>
              <portSpacing port="source_example set" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="recall" compatibility="5.0.8" expanded="true" height="60" name="Recall" width="90" x="45" y="345">
            <parameter key="name" value="stack"/>
            <parameter key="io_object" value="ExampleSet"/>
            <parameter key="remove_from_store" value="true"/>
          </operator>
          <operator activated="true" class="write_excel" compatibility="5.0.8" expanded="true" height="60" name="Write Excel" width="90" x="179" y="345">
            <parameter key="excel_file" value="%{Path}/fs_2_atts.xls"/>
            <parameter key="encoding" value="SYSTEM"/>
          </operator>
          <connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
          <connect from_op="Set Macro" from_port="through 1" to_op="Set Macro (2)" to_port="through 1"/>
          <connect from_op="Set Macro (2)" from_port="through 1" to_op="Set Macro (3)" to_port="through 1"/>
          <connect from_op="Set Macro (3)" from_port="through 1" to_op="Set Macro (4)" to_port="through 1"/>
          <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_op="only numeric" to_port="example set input"/>
          <connect from_op="only numeric" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
          <connect from_op="Recall" from_port="result" to_op="Write Excel" to_port="input"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
    </process>

    After some runtime the log says for every iteration: 
    Exception in thread "AWT-EventQueue-0" java.lang.ArrayIndexOutOfBoundsException
            at javax.swing.text.BoxView.updateLayoutArray(BoxView.java:196)
            at javax.swing.text.BoxView.replace(BoxView.java:168)
            at javax.swing.text.View.updateChildren(View.java:1095)
            at javax.swing.text.View.insertUpdate(View.java:679)
            at javax.swing.plaf.basic.BasicTextUI$RootView.insertUpdate(BasicTextUI.java:1590)
            at javax.swing.plaf.basic.BasicTextUI$UpdateHandler.insertUpdate(BasicTextUI.java:1849)
            at javax.swing.text.AbstractDocument.fireInsertUpdate(AbstractDocument.java:185)
            at javax.swing.text.AbstractDocument.handleInsertString(AbstractDocument.java:734)
            at javax.swing.text.AbstractDocument.insertString(AbstractDocument.java:693)
            at com.rapidminer.gui.tools.LoggingViewer.append(LoggingViewer.java:300)
            at com.rapidminer.gui.tools.LoggingViewer.access$000(LoggingViewer.java:83)
            at com.rapidminer.gui.tools.LoggingViewer$2$1.run(LoggingViewer.java:186)
            at java.awt.event.InvocationEvent.dispatch(InvocationEvent.java:209)
            at java.awt.EventQueue.dispatchEvent(EventQueue.java:597)
            at java.awt.EventDispatchThread.pumpOneEventForFilters(EventDispatchThread.java:269)
            at java.awt.EventDispatchThread.pumpEventsForFilter(EventDispatchThread.java:184)
            at java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispatchThread.java:174)
            at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:169)
            at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:161)
            at java.awt.EventDispatchThread.run(EventDispatchThread.java:122)
    I also tried to run it with a lower number of attributes (like 100 random by "Generate data") and it seems to work there. If someone experienced could help me please, I just dont know here the mistake is.

    Best regards,
    Markus
  • Options
    haddockhaddock Member Posts: 849 Maven
    Hi Markus,

    I think your code choked the cat because it kept storing extra copies of the log in memory. Here is code that selects all attribute pairs and logs their classification performance, hope that is what you had in mind.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.0">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="">
        <process expanded="true" height="365" width="748">
          <operator activated="true" class="retrieve" compatibility="5.0.8" expanded="true" height="60" name="Retrieve" width="90" x="179" y="75">
            <parameter key="repository_entry" value="//Samples/data/Golf"/>
          </operator>
          <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="380" y="75">
            <parameter key="use_exact_number" value="true"/>
            <parameter key="exact_number_of_attributes" value="2"/>
            <process expanded="true" height="380" width="815">
              <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30"/>
              <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
                <list key="class_weights"/>
              </operator>
              <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="94" name="Log" width="90" x="581" y="30">
                <list key="log">
                  <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
                  <parameter key="Performance" value="operator.Performance.value.accuracy"/>
                </list>
              </operator>
              <connect from_port="example set" to_op="Decision Tree" to_port="training set"/>
              <connect from_op="Decision Tree" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Decision Tree" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
              <connect from_op="Performance" from_port="example set" to_op="Log" to_port="through 2"/>
              <portSpacing port="source_example set" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="Loop Subsets" to_port="example set"/>
          <connect from_op="Loop Subsets" from_port="example set" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
  • Options
    MuehliManMuehliMan Member Posts: 85 Maven
    Thank you for your workflow!

    So using a cross-validation (X-Val) I would connect Performance Evaluation with the average output of the validation, and then the output of the X-Validation with the log operator, right?

    But how do I get the log written to a file (as data or even better example file) without this being done every single step?

    I agree, that writing a copy of the log file every iteration is killing the process. But i would still need the result of this search saved somewhere.

    Cheers,
    Markus
  • Options
    haddockhaddock Member Posts: 849 Maven
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.0">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="">
        <process expanded="true" height="365" width="748">
          <operator activated="true" class="retrieve" compatibility="5.0.8" expanded="true" height="60" name="Retrieve" width="90" x="179" y="75">
            <parameter key="repository_entry" value="//Samples/data/Golf"/>
          </operator>
          <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="380" y="75">
            <parameter key="use_exact_number" value="true"/>
            <parameter key="exact_number_of_attributes" value="2"/>
            <process expanded="true" height="380" width="815">
              <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30"/>
              <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
                <list key="class_weights"/>
              </operator>
              <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="94" name="Log" width="90" x="581" y="30">
                <list key="log">
                  <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
                  <parameter key="Performance" value="operator.Performance.value.accuracy"/>
                </list>
              </operator>
              <connect from_port="example set" to_op="Decision Tree" to_port="training set"/>
              <connect from_op="Decision Tree" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Decision Tree" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
              <connect from_op="Performance" from_port="example set" to_op="Log" to_port="through 2"/>
              <portSpacing port="source_example set" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data" width="90" x="571" y="73"/>
          <connect from_op="Retrieve" from_port="output" to_op="Loop Subsets" to_port="example set"/>
          <connect from_op="Loop Subsets" from_port="example set" to_op="Log to Data" to_port="through 1"/>
          <connect from_op="Log to Data" from_port="exampleSet" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
Sign In or Register to comment.