Options

What is the cause of the hex.gram.Gram$NonSPDMatrixException ?

anaRodriguesanaRodrigues Member Posts: 33 Contributor II
Hello,

I've checked the input example set and everything looks fine. Here's the error:

It only seems to happen with the logistic regression algorithm.

Here is my process:
<?xml version="1.0" encoding="UTF-8"?><process version="9.9.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.9.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="read_csv" compatibility="9.9.000" expanded="true" height="68" name="Read train (2)" width="90" x="45" y="34">
        <parameter key="csv_file" value="C:/Users/ASUS/Documents"/>
        <parameter key="column_separators" value=","/>
        <parameter key="trim_lines" value="false"/>
        <parameter key="use_quotes" value="true"/>
        <parameter key="quotes_character" value="&quot;"/>
        <parameter key="escape_character" value="\"/>
        <parameter key="skip_comments" value="false"/>
        <parameter key="comment_characters" value="#"/>
        <parameter key="starting_row" value="1"/>
        <parameter key="parse_numbers" value="true"/>
        <parameter key="decimal_character" value="."/>
        <parameter key="grouped_digits" value="false"/>
        <parameter key="grouping_character" value=","/>
        <parameter key="infinity_representation" value=""/>
        <parameter key="date_format" value=""/>
        <parameter key="first_row_as_names" value="true"/>
        <list key="annotations"/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="locale" value="English (United States)"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="read_all_values_as_polynominal" value="false"/>
        <list key="data_set_meta_data_information"/>
        <parameter key="read_not_matching_values_as_missings" value="true"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.9.000" expanded="true" height="103" name="Remove missing data" width="90" x="179" y="34">
        <parameter key="parameter_expression" value=""/>
        <parameter key="condition_class" value="no_missing_attributes"/>
        <parameter key="invert_filter" value="false"/>
        <list key="filters_list"/>
        <parameter key="filters_logic_and" value="true"/>
        <parameter key="filters_check_metadata" value="true"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role (2)" width="90" x="313" y="34">
        <parameter key="attribute_name" value="ID"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles">
          <parameter key="ID" value="id"/>
          <parameter key="Target" value="label"/>
        </list>
      </operator>
      <operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply (2)" width="90" x="447" y="34"/>
      <operator activated="true" class="sample" compatibility="9.9.000" expanded="true" height="82" name="Sample (3)" width="90" x="581" y="34">
        <parameter key="sample" value="absolute"/>
        <parameter key="balance_data" value="true"/>
        <parameter key="sample_size" value="100"/>
        <parameter key="sample_ratio" value="0.1"/>
        <parameter key="sample_probability" value="0.1"/>
        <list key="sample_size_per_class">
          <parameter key="False" value="51"/>
          <parameter key="True" value="51"/>
        </list>
        <list key="sample_ratio_per_class"/>
        <list key="sample_probability_per_class"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="9.9.000" expanded="true" height="145" name="with Downsampling" width="90" x="715" y="34">
        <list key="parameters">
          <parameter key="Remove Correlated Attributes.correlation" value="[0.4;1.0;6;linear]"/>
          <parameter key="MRMR-FS.k" value="[10;24;7;linear]"/>
        </list>
        <parameter key="error_handling" value="fail on error"/>
        <parameter key="log_performance" value="true"/>
        <parameter key="log_all_criteria" value="false"/>
        <parameter key="synchronize" value="false"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="concurrency:cross_validation" compatibility="9.9.000" expanded="true" height="145" name="Cross Validation (2)" width="90" x="45" y="34">
            <parameter key="split_on_batch_attribute" value="false"/>
            <parameter key="leave_one_out" value="false"/>
            <parameter key="number_of_folds" value="4"/>
            <parameter key="sampling_type" value="automatic"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Remove outliers (2)" width="90" x="45" y="34">
                <process expanded="true">
                  <operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply (3)" width="90" x="45" y="34"/>
                  <operator activated="true" class="normalize" compatibility="9.9.000" expanded="true" height="103" name="Normalize (2)" width="90" x="112" y="187">
                    <parameter key="return_preprocessing_model" value="false"/>
                    <parameter key="create_view" value="false"/>
                    <parameter key="attribute_filter_type" value="all"/>
                    <parameter key="attribute" value=""/>
                    <parameter key="attributes" value=""/>
                    <parameter key="use_except_expression" value="false"/>
                    <parameter key="value_type" value="numeric"/>
                    <parameter key="use_value_type_exception" value="false"/>
                    <parameter key="except_value_type" value="real"/>
                    <parameter key="block_type" value="value_series"/>
                    <parameter key="use_block_type_exception" value="false"/>
                    <parameter key="except_block_type" value="value_series_end"/>
                    <parameter key="invert_selection" value="false"/>
                    <parameter key="include_special_attributes" value="false"/>
                    <parameter key="method" value="Z-transformation"/>
                    <parameter key="min" value="0.0"/>
                    <parameter key="max" value="1.0"/>
                    <parameter key="allow_negative_values" value="false"/>
                  </operator>
                  <operator activated="true" class="detect_outlier_lof" compatibility="9.9.000" expanded="true" height="82" name="Detect Outlier (LOF)" width="90" x="246" y="187">
                    <parameter key="minimal_points_lower_bound" value="10"/>
                    <parameter key="minimal_points_upper_bound" value="20"/>
                    <parameter key="distance_function" value="euclidian distance"/>
                  </operator>
                  <operator activated="true" class="python_scripting:execute_python" compatibility="9.8.000" expanded="true" height="124" name="Execute Python (3)" width="90" x="380" y="34">
                    <parameter key="script" value="import pandas&#10;&#10;# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none),&#10;#     or the number of input ports plus one if &quot;use macros&quot; parameter is set&#10;# if you want to use macros, use this instead and check &quot;use macros&quot; parameter:&#10;#def rm_main(data,macros):&#10;def rm_main(ori, norm):&#10;    ids = list(norm.loc[norm['outlier']&lt;2, 'ID'])&#10;    data = ori.set_index('ID', drop = False).loc[ids,:]&#10;    return data"/>
                    <parameter key="notebook_cell_tag_filter" value=""/>
                    <parameter key="use_default_python" value="true"/>
                    <parameter key="package_manager" value="conda (anaconda)"/>
                    <parameter key="use_macros" value="false"/>
                  </operator>
                  <operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role (4)" width="90" x="581" y="34">
                    <parameter key="attribute_name" value="ID"/>
                    <parameter key="target_role" value="id"/>
                    <list key="set_additional_roles">
                      <parameter key="Target" value="label"/>
                      <parameter key="ID" value="id"/>
                    </list>
                  </operator>
                  <connect from_port="in 1" to_op="Multiply (3)" to_port="input"/>
                  <connect from_op="Multiply (3)" from_port="output 1" to_op="Execute Python (3)" to_port="input 1"/>
                  <connect from_op="Multiply (3)" from_port="output 2" to_op="Normalize (2)" to_port="example set input"/>
                  <connect from_op="Normalize (2)" from_port="example set output" to_op="Detect Outlier (LOF)" to_port="example set input"/>
                  <connect from_op="Detect Outlier (LOF)" from_port="example set output" to_op="Execute Python (3)" to_port="input 2"/>
                  <connect from_op="Execute Python (3)" from_port="output 1" to_op="Set Role (4)" to_port="example set input"/>
                  <connect from_op="Set Role (4)" from_port="example set output" to_port="out 1"/>
                  <portSpacing port="source_in 1" spacing="0"/>
                  <portSpacing port="source_in 2" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="remove_correlated_attributes" compatibility="9.9.000" expanded="true" height="82" name="Remove Correlated Attributes" width="90" x="179" y="34">
                <parameter key="correlation" value="1.0"/>
                <parameter key="filter_relation" value="greater"/>
                <parameter key="attribute_order" value="random"/>
                <parameter key="use_absolute_correlation" value="true"/>
                <parameter key="use_local_random_seed" value="false"/>
                <parameter key="local_random_seed" value="1992"/>
              </operator>
              <operator activated="true" class="featselext:mrmr_feature_selection" compatibility="1.1.004" expanded="true" height="82" name="MRMR-FS" width="90" x="313" y="34">
                <parameter key="normalize_weights" value="false"/>
                <parameter key="sort_weights" value="false"/>
                <parameter key="sort_direction" value="ascending"/>
                <parameter key="sets_or_ranks" value="sets"/>
                <parameter key="calculate full ranking" value="true"/>
                <parameter key="k" value="24"/>
                <parameter key="relevance_redundancy_relation" value="quotient"/>
                <parameter key="use_ensemble_method" value="none"/>
                <parameter key="ensemble_size" value="10"/>
                <parameter key="logging" value="false"/>
              </operator>
              <operator activated="true" class="h2o:logistic_regression" compatibility="9.9.000" expanded="true" height="124" name="Logistic Regression" width="90" x="447" y="34">
                <parameter key="solver" value="AUTO"/>
                <parameter key="reproducible" value="false"/>
                <parameter key="maximum_number_of_threads" value="4"/>
                <parameter key="use_regularization" value="false"/>
                <parameter key="lambda_search" value="false"/>
                <parameter key="number_of_lambdas" value="0"/>
                <parameter key="lambda_min_ratio" value="0.0"/>
                <parameter key="early_stopping" value="true"/>
                <parameter key="stopping_rounds" value="3"/>
                <parameter key="stopping_tolerance" value="0.001"/>
                <parameter key="standardize" value="true"/>
                <parameter key="non-negative_coefficients" value="false"/>
                <parameter key="add_intercept" value="true"/>
                <parameter key="compute_p-values" value="true"/>
                <parameter key="remove_collinear_columns" value="true"/>
                <parameter key="missing_values_handling" value="MeanImputation"/>
                <parameter key="max_iterations" value="0"/>
                <parameter key="max_runtime_seconds" value="0"/>
              </operator>
              <connect from_port="training set" to_op="Remove outliers (2)" to_port="in 1"/>
              <connect from_op="Remove outliers (2)" from_port="out 1" to_op="Remove Correlated Attributes" to_port="example set input"/>
              <connect from_op="Remove Correlated Attributes" from_port="example set output" to_op="MRMR-FS" to_port="example set"/>
              <connect from_op="MRMR-FS" from_port="example set" to_op="Logistic Regression" to_port="training set"/>
              <connect from_op="Logistic Regression" from_port="model" to_port="model"/>
              <portSpacing port="source_training set" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="9.9.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
                <list key="application_parameters"/>
                <parameter key="create_view" value="false"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="9.9.000" expanded="true" height="82" name="CV-D" width="90" x="179" y="34">
                <parameter key="manually_set_positive_class" value="true"/>
                <parameter key="positive_class" value="True"/>
                <parameter key="main_criterion" value="recall"/>
                <parameter key="accuracy" value="false"/>
                <parameter key="classification_error" value="false"/>
                <parameter key="kappa" value="true"/>
                <parameter key="AUC (optimistic)" value="false"/>
                <parameter key="AUC" value="true"/>
                <parameter key="AUC (pessimistic)" value="false"/>
                <parameter key="precision" value="true"/>
                <parameter key="recall" value="true"/>
                <parameter key="lift" value="false"/>
                <parameter key="fallout" value="false"/>
                <parameter key="f_measure" value="false"/>
                <parameter key="false_positive" value="false"/>
                <parameter key="false_negative" value="false"/>
                <parameter key="true_positive" value="false"/>
                <parameter key="true_negative" value="false"/>
                <parameter key="sensitivity" value="false"/>
                <parameter key="specificity" value="false"/>
                <parameter key="youden" value="false"/>
                <parameter key="positive_predictive_value" value="false"/>
                <parameter key="negative_predictive_value" value="false"/>
                <parameter key="psep" value="false"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <operator activated="true" class="operator_toolbox:performance_auprc" compatibility="2.9.000" expanded="true" height="82" name="Performance (AUPRC)" width="90" x="313" y="34">
                <parameter key="main_criterion" value="first"/>
                <parameter key="accuracy" value="false"/>
                <parameter key="AUC" value="false"/>
                <parameter key="AUPRC" value="true"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <operator activated="true" class="radiomics_test:my_own_operator" compatibility="1.0.000" expanded="true" height="82" name="Performance (Fbeta-score)" width="90" x="447" y="34">
                <parameter key="Manually set positive class" value="true"/>
                <parameter key="Positive class" value="True"/>
                <parameter key="Make Fbeta-score the main criterion" value="true"/>
                <parameter key="Beta" value="2.0"/>
              </operator>
              <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
              <connect from_op="Apply Model (2)" from_port="labelled data" to_op="CV-D" to_port="labelled data"/>
              <connect from_op="CV-D" from_port="performance" to_op="Performance (AUPRC)" to_port="performance"/>
              <connect from_op="CV-D" from_port="example set" to_op="Performance (AUPRC)" to_port="labelled data"/>
              <connect from_op="Performance (AUPRC)" from_port="performance" to_op="Performance (Fbeta-score)" to_port="performance vector"/>
              <connect from_op="Performance (AUPRC)" from_port="example set" to_op="Performance (Fbeta-score)" to_port="labelled example set"/>
              <connect from_op="Performance (Fbeta-score)" from_port="performance vector" to_port="performance 1"/>
              <connect from_op="Performance (Fbeta-score)" from_port="labelled example set" to_port="test set results"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_test set results" spacing="0"/>
              <portSpacing port="sink_performance 1" spacing="0"/>
              <portSpacing port="sink_performance 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Cross Validation (2)" to_port="example set"/>
          <connect from_op="Cross Validation (2)" from_port="model" to_port="model"/>
          <connect from_op="Cross Validation (2)" from_port="test result set" to_port="output 1"/>
          <connect from_op="Cross Validation (2)" from_port="performance 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="store" compatibility="9.9.000" expanded="true" height="68" name="Store" width="90" x="1117" y="85">
        <parameter key="repository_entry" value="../Models_mRMR/G_D_mRMR_LR"/>
      </operator>
      <operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="9.9.000" expanded="true" height="145" name="without downsampling" width="90" x="715" y="187">
        <list key="parameters">
          <parameter key="Remove Correlated Attributes (2).correlation" value="[0.4;1.0;6;linear]"/>
          <parameter key="MRMR-FS (2).k" value="[10;24;7;linear]"/>
        </list>
        <parameter key="error_handling" value="fail on error"/>
        <parameter key="log_performance" value="true"/>
        <parameter key="log_all_criteria" value="false"/>
        <parameter key="synchronize" value="false"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="concurrency:cross_validation" compatibility="9.9.000" expanded="true" height="145" name="Cross Validation" width="90" x="45" y="34">
            <parameter key="split_on_batch_attribute" value="false"/>
            <parameter key="leave_one_out" value="false"/>
            <parameter key="number_of_folds" value="4"/>
            <parameter key="sampling_type" value="automatic"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Remove outliers" width="90" x="45" y="34">
                <process expanded="true">
                  <operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply (4)" width="90" x="45" y="34"/>
                  <operator activated="true" class="normalize" compatibility="9.9.000" expanded="true" height="103" name="Normalize" width="90" x="112" y="187">
                    <parameter key="return_preprocessing_model" value="false"/>
                    <parameter key="create_view" value="false"/>
                    <parameter key="attribute_filter_type" value="all"/>
                    <parameter key="attribute" value=""/>
                    <parameter key="attributes" value=""/>
                    <parameter key="use_except_expression" value="false"/>
                    <parameter key="value_type" value="numeric"/>
                    <parameter key="use_value_type_exception" value="false"/>
                    <parameter key="except_value_type" value="real"/>
                    <parameter key="block_type" value="value_series"/>
                    <parameter key="use_block_type_exception" value="false"/>
                    <parameter key="except_block_type" value="value_series_end"/>
                    <parameter key="invert_selection" value="false"/>
                    <parameter key="include_special_attributes" value="false"/>
                    <parameter key="method" value="Z-transformation"/>
                    <parameter key="min" value="0.0"/>
                    <parameter key="max" value="1.0"/>
                    <parameter key="allow_negative_values" value="false"/>
                  </operator>
                  <operator activated="true" class="detect_outlier_lof" compatibility="9.9.000" expanded="true" height="82" name="Detect Outlier (LOF) (2)" width="90" x="246" y="187">
                    <parameter key="minimal_points_lower_bound" value="10"/>
                    <parameter key="minimal_points_upper_bound" value="20"/>
                    <parameter key="distance_function" value="euclidian distance"/>
                  </operator>
                  <operator activated="true" class="python_scripting:execute_python" compatibility="9.8.000" expanded="true" height="124" name="Execute Python (4)" width="90" x="380" y="34">
                    <parameter key="script" value="import pandas&#10;&#10;# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none),&#10;#     or the number of input ports plus one if &quot;use macros&quot; parameter is set&#10;# if you want to use macros, use this instead and check &quot;use macros&quot; parameter:&#10;#def rm_main(data,macros):&#10;def rm_main(ori, norm):&#10;    ids = list(norm.loc[norm['outlier']&lt;2, 'ID'])&#10;    data = ori.set_index('ID', drop = False).loc[ids,:]&#10;    return data"/>
                    <parameter key="notebook_cell_tag_filter" value=""/>
                    <parameter key="use_default_python" value="true"/>
                    <parameter key="package_manager" value="conda (anaconda)"/>
                    <parameter key="use_macros" value="false"/>
                  </operator>
                  <operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role (5)" width="90" x="581" y="34">
                    <parameter key="attribute_name" value="ID"/>
                    <parameter key="target_role" value="id"/>
                    <list key="set_additional_roles">
                      <parameter key="Target" value="label"/>
                      <parameter key="ID" value="id"/>
                    </list>
                  </operator>
                  <connect from_port="in 1" to_op="Multiply (4)" to_port="input"/>
                  <connect from_op="Multiply (4)" from_port="output 1" to_op="Execute Python (4)" to_port="input 1"/>
                  <connect from_op="Multiply (4)" from_port="output 2" to_op="Normalize" to_port="example set input"/>
                  <connect from_op="Normalize" from_port="example set output" to_op="Detect Outlier (LOF) (2)" to_port="example set input"/>
                  <connect from_op="Detect Outlier (LOF) (2)" from_port="example set output" to_op="Execute Python (4)" to_port="input 2"/>
                  <connect from_op="Execute Python (4)" from_port="output 1" to_op="Set Role (5)" to_port="example set input"/>
                  <connect from_op="Set Role (5)" from_port="example set output" to_port="out 1"/>
                  <portSpacing port="source_in 1" spacing="0"/>
                  <portSpacing port="source_in 2" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="remove_correlated_attributes" compatibility="9.9.000" expanded="true" height="82" name="Remove Correlated Attributes (2)" width="90" x="179" y="34">
                <parameter key="correlation" value="0.2"/>
                <parameter key="filter_relation" value="greater"/>
                <parameter key="attribute_order" value="random"/>
                <parameter key="use_absolute_correlation" value="true"/>
                <parameter key="use_local_random_seed" value="false"/>
                <parameter key="local_random_seed" value="1992"/>
              </operator>
              <operator activated="true" class="featselext:mrmr_feature_selection" compatibility="1.1.004" expanded="true" height="82" name="MRMR-FS (2)" width="90" x="313" y="34">
                <parameter key="normalize_weights" value="false"/>
                <parameter key="sort_weights" value="false"/>
                <parameter key="sort_direction" value="ascending"/>
                <parameter key="sets_or_ranks" value="sets"/>
                <parameter key="calculate full ranking" value="true"/>
                <parameter key="k" value="100"/>
                <parameter key="relevance_redundancy_relation" value="quotient"/>
                <parameter key="use_ensemble_method" value="none"/>
                <parameter key="ensemble_size" value="10"/>
                <parameter key="logging" value="false"/>
              </operator>
              <operator activated="true" class="h2o:logistic_regression" compatibility="9.9.000" expanded="true" height="124" name="Logistic Regression (2)" width="90" x="581" y="34">
                <parameter key="solver" value="AUTO"/>
                <parameter key="reproducible" value="false"/>
                <parameter key="maximum_number_of_threads" value="4"/>
                <parameter key="use_regularization" value="false"/>
                <parameter key="lambda_search" value="false"/>
                <parameter key="number_of_lambdas" value="0"/>
                <parameter key="lambda_min_ratio" value="0.0"/>
                <parameter key="early_stopping" value="true"/>
                <parameter key="stopping_rounds" value="3"/>
                <parameter key="stopping_tolerance" value="0.001"/>
                <parameter key="standardize" value="true"/>
                <parameter key="non-negative_coefficients" value="false"/>
                <parameter key="add_intercept" value="true"/>
                <parameter key="compute_p-values" value="true"/>
                <parameter key="remove_collinear_columns" value="true"/>
                <parameter key="missing_values_handling" value="MeanImputation"/>
                <parameter key="max_iterations" value="0"/>
                <parameter key="max_runtime_seconds" value="0"/>
              </operator>
              <connect from_port="training set" to_op="Remove outliers" to_port="in 1"/>
              <connect from_op="Remove outliers" from_port="out 1" to_op="Remove Correlated Attributes (2)" to_port="example set input"/>
              <connect from_op="Remove Correlated Attributes (2)" from_port="example set output" to_op="MRMR-FS (2)" to_port="example set"/>
              <connect from_op="MRMR-FS (2)" from_port="example set" to_op="Logistic Regression (2)" to_port="training set"/>
              <connect from_op="Logistic Regression (2)" from_port="model" to_port="model"/>
              <portSpacing port="source_training set" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="9.9.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
                <list key="application_parameters"/>
                <parameter key="create_view" value="false"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="9.9.000" expanded="true" height="82" name="CV-nD" width="90" x="179" y="34">
                <parameter key="manually_set_positive_class" value="true"/>
                <parameter key="positive_class" value="True"/>
                <parameter key="main_criterion" value="recall"/>
                <parameter key="accuracy" value="false"/>
                <parameter key="classification_error" value="false"/>
                <parameter key="kappa" value="true"/>
                <parameter key="AUC (optimistic)" value="false"/>
                <parameter key="AUC" value="true"/>
                <parameter key="AUC (pessimistic)" value="false"/>
                <parameter key="precision" value="true"/>
                <parameter key="recall" value="true"/>
                <parameter key="lift" value="false"/>
                <parameter key="fallout" value="false"/>
                <parameter key="f_measure" value="false"/>
                <parameter key="false_positive" value="false"/>
                <parameter key="false_negative" value="false"/>
                <parameter key="true_positive" value="false"/>
                <parameter key="true_negative" value="false"/>
                <parameter key="sensitivity" value="false"/>
                <parameter key="specificity" value="false"/>
                <parameter key="youden" value="false"/>
                <parameter key="positive_predictive_value" value="false"/>
                <parameter key="negative_predictive_value" value="false"/>
                <parameter key="psep" value="false"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <operator activated="true" class="operator_toolbox:performance_auprc" compatibility="2.9.000" expanded="true" height="82" name="Performance (AUPRC) (2)" width="90" x="313" y="34">
                <parameter key="main_criterion" value="first"/>
                <parameter key="accuracy" value="false"/>
                <parameter key="AUC" value="false"/>
                <parameter key="AUPRC" value="true"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <operator activated="true" class="radiomics_test:my_own_operator" compatibility="1.0.000" expanded="true" height="82" name="Performance (Fbeta-score) (3)" width="90" x="447" y="34">
                <parameter key="Manually set positive class" value="true"/>
                <parameter key="Positive class" value="True"/>
                <parameter key="Make Fbeta-score the main criterion" value="true"/>
                <parameter key="Beta" value="2.0"/>
              </operator>
              <connect from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="CV-nD" to_port="labelled data"/>
              <connect from_op="CV-nD" from_port="performance" to_op="Performance (AUPRC) (2)" to_port="performance"/>
              <connect from_op="CV-nD" from_port="example set" to_op="Performance (AUPRC) (2)" to_port="labelled data"/>
              <connect from_op="Performance (AUPRC) (2)" from_port="performance" to_op="Performance (Fbeta-score) (3)" to_port="performance vector"/>
              <connect from_op="Performance (AUPRC) (2)" from_port="example set" to_op="Performance (Fbeta-score) (3)" to_port="labelled example set"/>
              <connect from_op="Performance (Fbeta-score) (3)" from_port="performance vector" to_port="performance 1"/>
              <connect from_op="Performance (Fbeta-score) (3)" from_port="labelled example set" to_port="test set results"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_test set results" spacing="0"/>
              <portSpacing port="sink_performance 1" spacing="0"/>
              <portSpacing port="sink_performance 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Cross Validation" to_port="example set"/>
          <connect from_op="Cross Validation" from_port="model" to_port="model"/>
          <connect from_op="Cross Validation" from_port="test result set" to_port="output 1"/>
          <connect from_op="Cross Validation" from_port="performance 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="store" compatibility="9.9.000" expanded="true" height="68" name="Store (2)" width="90" x="1050" y="238">
        <parameter key="repository_entry" value="../Models_mRMR/G_nD_mRMR_LR"/>
      </operator>
      <operator activated="true" class="python_scripting:execute_python" compatibility="9.8.000" expanded="true" height="124" name="DeLong Test (AUPRC) (3)" width="90" x="916" y="289">
        <parameter key="script" value="import pandas&#10;import scipy.stats as st&#10;from sklearn import metrics&#10;from sklearn.metrics import precision_recall_curve&#10;from sklearn.metrics import auc&#10;&#10;def kernel(X, Y):&#10;    return .5 if Y==X else int(Y &lt; X)&#10;def structural_components(X, Y):&#10;    V10 = [1/len(Y) * sum([kernel(x, y) for y in Y]) for x in X]&#10;    V01 = [1/len(X) * sum([kernel(x, y) for x in X]) for y in Y]&#10;    return V10, V01&#10;def get_S_entry(V_A, V_B, auc_A, auc_B):&#10;    return 1/(len(V_A)-1) * sum([(a-auc_A)*(b-auc_B) for a,b in zip(V_A, V_B)])&#10;def z_score(var_A, var_B, covar_AB, auc_A, auc_B):&#10;    return (auc_A - auc_B)/((var_A + var_B - 2*covar_AB)**(.5))&#10;def group_preds_by_label(preds, actual):&#10;    X = [p for (p, a) in zip(preds, actual) if a=='True']&#10;    Y = [p for (p, a) in zip(preds, actual) if not a=='True']&#10;    return X, Y&#10;&#10;def rm_main(dataA, dataB):&#10;    preds_A = dataA.loc[:, 'prediction(Target)']&#10;    preds_B = dataB.loc[:, 'prediction(Target)']&#10;    actual_A = dataA.loc[:, 'Target']&#10;    actual_B = dataB.loc[:, 'Target']&#10;    &#10;    X_A, Y_A = group_preds_by_label(preds_A, actual_A)&#10;    X_B, Y_B = group_preds_by_label(preds_B, actual_B)&#10;    V_A10, V_A01 = structural_components(X_A, Y_A)&#10;    V_B10, V_B01 = structural_components(X_B, Y_B)&#10;    &#10;    a_A = [1 if elem == 'True' else 0 for elem in actual_A]&#10;    a_B = [1 if elem == 'True' else 0 for elem in actual_B]&#10;    p_A = [1 if elem == 'True' else 0 for elem in preds_A]&#10;    p_B = [1 if elem == 'True' else 0 for elem in preds_B]&#10;    precision_A, recall_A, thresholds_A = precision_recall_curve(a_A, p_A)&#10;    auc_A = auc(recall_A, precision_A)&#10;    precision_B, recall_B, thresholds_B = precision_recall_curve(a_B, p_B)&#10;    auc_B = auc(recall_B, precision_B)&#10;    &#10;    # Compute entries of covariance matrix S (covar_AB = covar_BA)&#10;    var_A = (get_S_entry(V_A10, V_A10, auc_A, auc_A) * 1/len(V_A10)&#10;             + get_S_entry(V_A01, V_A01, auc_A, auc_A) * 1/len(V_A01))&#10;    var_B = (get_S_entry(V_B10, V_B10, auc_B, auc_B) * 1/len(V_B10)&#10;             + get_S_entry(V_B01, V_B01, auc_B, auc_B) * 1/len(V_B01))&#10;    covar_AB = (get_S_entry(V_A10, V_B10, auc_A, auc_B) * 1/len(V_A10)&#10;                + get_S_entry(V_A01, V_B01, auc_A, auc_B) * 1/len(V_A01))&#10;    # Two tailed test&#10;    z = z_score(var_A, var_B, covar_AB, auc_A, auc_B)&#10;    p = st.norm.sf(abs(z))*2&#10;    print('Is AUPRC_A significantly different from AUPRC_B?')&#10;    print('CV p-value:', p)&#10;    return p"/>
        <parameter key="notebook_cell_tag_filter" value=""/>
        <parameter key="use_default_python" value="true"/>
        <parameter key="package_manager" value="conda (anaconda)"/>
        <parameter key="use_macros" value="false"/>
      </operator>
      <connect from_op="Read train (2)" from_port="output" to_op="Remove missing data" to_port="example set input"/>
      <connect from_op="Remove missing data" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
      <connect from_op="Multiply (2)" from_port="output 1" to_op="Sample (3)" to_port="example set input"/>
      <connect from_op="Multiply (2)" from_port="output 2" to_op="without downsampling" to_port="input 1"/>
      <connect from_op="Sample (3)" from_port="example set output" to_op="with Downsampling" to_port="input 1"/>
      <connect from_op="with Downsampling" from_port="performance" to_port="result 1"/>
      <connect from_op="with Downsampling" from_port="model" to_op="Store" to_port="input"/>
      <connect from_op="with Downsampling" from_port="output 1" to_op="DeLong Test (AUPRC) (3)" to_port="input 1"/>
      <connect from_op="without downsampling" from_port="performance" to_port="result 2"/>
      <connect from_op="without downsampling" from_port="model" to_op="Store (2)" to_port="input"/>
      <connect from_op="without downsampling" from_port="output 1" to_op="DeLong Test (AUPRC) (3)" to_port="input 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>


Thank you in advance,
Ana
Tagged:

Best Answer

  • Options
    anaRodriguesanaRodrigues Member Posts: 33 Contributor II
    Solution Accepted
    Hi Jonas,

    Thank you for your answer. Yes, that cleared the error, but now the process is freezing and keeps logging the same statement over and over again.

    Apr 16, 2021 11:02:26 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.693 imp=.1E1 bdf=.28E-1
    Apr 16, 2021 11:02:30 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.693 imp=.1E1 bdf=.28E-1
    Apr 16, 2021 11:02:31 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.6927 imp=.1E1 bdf=.57E-1
    Apr 16, 2021 11:02:31 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.693 imp=.1E1 bdf=.28E-1
    Apr 16, 2021 11:02:35 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.693 imp=.1E1 bdf=.28E-1
    Apr 16, 2021 11:02:36 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.6927 imp=.1E1 bdf=.57E-1
    Apr 16 2021 11:02:36 AM INFO: H2O: 2% - iter=0 lmb=.0E0 obj=0.693 imp=.1E1 bdf=.28E-1


    Greetings,
    Ana

Answers

  • Options
    jwpfaujwpfau Employee, Member Posts: 277 RM Engineering
    Hi Ana,

    does it help to enable the "use regularization" parameter?

    That's at least suggested by this stackoverflow post.

    Greetings,
    Jonas
Sign In or Register to comment.