Options

How can I get high f-measure for predicting a model?

incle17incle17 Member Posts: 1 Newbie
Hi. I tried using naive bayes in split validator and used sample (bootstrapping) but the measure that I'm getting is so low. I'm expecting to get 99 max and 85 lower for f-measure but I keep on getting 30 max and 10 lower. What am I doing wrong? I even played with the split ratio but I'm getting the same results.

Answers

  • Options
    yyhuangyyhuang Administrator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    edited April 2019
    Hi @incle17,

    You may need to run optimization for hyper-parameter tuning with another model. Since Naive Bayes does not support any parameters. 

    F-measure is a harmonic mean of Precision and Recall (sensitivity) so it could be very low if you have imbalanced data!!

    The attached process give an example that search for the best value for gamma, and C in SVM to achieve higher F-measurement

    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Weighting" origin="GENERATED_TUTORIAL" width="90" x="112" y="30">
            <parameter key="repository_entry" value="//Samples/data/Weighting"/>
          </operator>
          <operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="8.2.000" expanded="true" height="124" name="Optimize Parameters (Grid)" origin="GENERATED_TUTORIAL" width="90" x="447" y="34">
            <list key="parameters">
              <parameter key="SVM.C" value="[0.001;100000;10;logarithmic]"/>
              <parameter key="SVM.gamma" value="[0.001;1.5;10;logarithmic]"/>
            </list>
            <parameter key="error_handling" value="fail on error"/>
            <parameter key="log_performance" value="true"/>
            <parameter key="log_all_criteria" value="true"/>
            <parameter key="synchronize" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="split_data" compatibility="9.2.001" expanded="true" height="103" name="Split Data" origin="GENERATED_TUTORIAL" width="90" x="45" y="30">
                <enumeration key="partitions">
                  <parameter key="ratio" value="0.5"/>
                  <parameter key="ratio" value="0.5"/>
                </enumeration>
                <parameter key="sampling_type" value="automatic"/>
                <parameter key="use_local_random_seed" value="true"/>
                <parameter key="local_random_seed" value="1992"/>
              </operator>
              <operator activated="true" class="support_vector_machine_libsvm" compatibility="9.2.001" expanded="true" height="82" name="SVM" origin="GENERATED_TUTORIAL" width="90" x="179" y="85">
                <parameter key="svm_type" value="C-SVC"/>
                <parameter key="kernel_type" value="rbf"/>
                <parameter key="degree" value="3"/>
                <parameter key="gamma" value="1.5"/>
                <parameter key="coef0" value="0.0"/>
                <parameter key="C" value="100000.0"/>
                <parameter key="nu" value="0.5"/>
                <parameter key="cache_size" value="80"/>
                <parameter key="epsilon" value="0.001"/>
                <parameter key="p" value="0.1"/>
                <list key="class_weights"/>
                <parameter key="shrinking" value="true"/>
                <parameter key="calculate_confidences" value="false"/>
                <parameter key="confidence_for_multiclass" value="true"/>
              </operator>
              <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" origin="GENERATED_TUTORIAL" width="90" x="447" y="30">
                <list key="application_parameters"/>
                <parameter key="create_view" value="false"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="9.2.001" expanded="true" height="82" name="Performance (2)" width="90" x="782" y="34">
                <parameter key="main_criterion" value="f_measure"/>
                <parameter key="accuracy" value="true"/>
                <parameter key="classification_error" value="false"/>
                <parameter key="kappa" value="false"/>
                <parameter key="AUC (optimistic)" value="false"/>
                <parameter key="AUC" value="false"/>
                <parameter key="AUC (pessimistic)" value="false"/>
                <parameter key="precision" value="false"/>
                <parameter key="recall" value="false"/>
                <parameter key="lift" value="false"/>
                <parameter key="fallout" value="false"/>
                <parameter key="f_measure" value="true"/>
                <parameter key="false_positive" value="false"/>
                <parameter key="false_negative" value="false"/>
                <parameter key="true_positive" value="false"/>
                <parameter key="true_negative" value="false"/>
                <parameter key="sensitivity" value="true"/>
                <parameter key="specificity" value="false"/>
                <parameter key="youden" value="false"/>
                <parameter key="positive_predictive_value" value="false"/>
                <parameter key="negative_predictive_value" value="false"/>
                <parameter key="psep" value="false"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <connect from_port="input 1" to_op="Split Data" to_port="example set"/>
              <connect from_op="Split Data" from_port="partition 1" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Split Data" from_port="partition 2" to_op="SVM" to_port="training set"/>
              <connect from_op="SVM" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
              <connect from_op="Performance (2)" from_port="performance" to_port="performance"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Weighting" from_port="output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
          <connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
          <connect from_op="Optimize Parameters (Grid)" from_port="parameter set" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="18"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    



    YY
Sign In or Register to comment.