🥳 RAPIDMINER 9.9 IS OUT!!! 🥳

The updates in 9.9 power advanced use cases and offer productivity enhancements for users who prefer to code.

CLICK HERE TO DOWNLOAD

Predict multiple value based on another value

legeithienlegeithien Member Posts: 3 Learner I
Hello guys,

so, i had this data which represents the contents of soil in x-depth and the data only has limited depth. i tried to make model based on contents of soil in x-depth which going to predict those contents of soil arent shown on the depth. e.g contents of soil in 3 m depths and make this data to predict contents of soil in 5m depth

i tried using impute missing values operator to estimate those missing values of contents and using naive bayes, k-NN. i was going to use at least 3 predictions but every time i tried it kept getting errors, so im only using those two for now.

<?xml version="1.0" encoding="UTF-8"?><process version="9.9.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.9.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="9.9.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
        <parameter key="excel_file" value="C:\Users\Avengers\Documents\RapidMiner\Local Repository\fyp\another part1.xlsx"/>
        <parameter key="sheet_selection" value="sheet number"/>
        <parameter key="sheet_number" value="1"/>
        <parameter key="imported_cell_range" value="A1"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="first_row_as_names" value="true"/>
        <list key="annotations"/>
        <parameter key="date_format" value=""/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="locale" value="English (United States)"/>
        <parameter key="read_all_values_as_polynominal" value="false"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Depth (m).true.real.attribute"/>
          <parameter key="1" value="Gravel.true.integer.attribute"/>
          <parameter key="2" value="Sand.true.real.attribute"/>
          <parameter key="3" value="Silt.true.real.attribute"/>
          <parameter key="4" value="Clay.true.integer.attribute"/>
          <parameter key="5" value="LL.true.integer.attribute"/>
          <parameter key="6" value="PL.true.integer.attribute"/>
          <parameter key="7" value="PI.true.integer.attribute"/>
          <parameter key="8" value="Moisture Content (%).true.real.attribute"/>
          <parameter key="9" value="Bulk Density (Mg/m3).true.real.attribute"/>
          <parameter key="10" value="Dry Density (Mg/m3).true.real.attribute"/>
          <parameter key="11" value="Specific Gravity.true.real.attribute"/>
          <parameter key="12" value="SPT (N).true.integer.attribute"/>
          <parameter key="13" value="Type of Soil.true.polynominal.attribute"/>
          <parameter key="14" value="Soil Condition.true.polynominal.attribute"/>
          <parameter key="15" value="Resistivity (Ohm\.m).true.real.attribute"/>
          <parameter key="16" value="Seismic, Vp (km/s).true.real.attribute"/>
        </list>
        <parameter key="read_not_matching_values_as_missings" value="true"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Subprocess" width="90" x="179" y="34">
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="9.9.000" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role" width="90" x="179" y="34">
            <parameter key="attribute_name" value="Type of Soil"/>
            <parameter key="target_role" value="prediction"/>
            <list key="set_additional_roles">
              <parameter key="Type of Soil" value="label"/>
            </list>
          </operator>
          <operator activated="true" class="impute_missing_values" compatibility="9.9.000" expanded="true" height="68" name="Impute Missing Values" width="90" x="313" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="iterate" value="true"/>
            <parameter key="learn_on_complete_cases" value="true"/>
            <parameter key="order" value="chronological"/>
            <parameter key="sort" value="ascending"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <process expanded="true">
              <operator activated="true" class="k_nn" compatibility="9.9.000" expanded="true" height="82" name="k-NN" width="90" x="179" y="34">
                <parameter key="k" value="9"/>
                <parameter key="weighted_vote" value="true"/>
                <parameter key="measure_types" value="MixedMeasures"/>
                <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
                <parameter key="nominal_measure" value="NominalDistance"/>
                <parameter key="numerical_measure" value="EuclideanDistance"/>
                <parameter key="divergence" value="GeneralizedIDivergence"/>
                <parameter key="kernel_type" value="radial"/>
                <parameter key="kernel_gamma" value="1.0"/>
                <parameter key="kernel_sigma1" value="1.0"/>
                <parameter key="kernel_sigma2" value="0.0"/>
                <parameter key="kernel_sigma3" value="2.0"/>
                <parameter key="kernel_degree" value="3.0"/>
                <parameter key="kernel_shift" value="1.0"/>
                <parameter key="kernel_a" value="1.0"/>
                <parameter key="kernel_b" value="0.0"/>
              </operator>
              <connect from_port="example set source" to_op="k-NN" to_port="training set"/>
              <connect from_op="k-NN" from_port="model" to_port="model sink"/>
              <portSpacing port="source_example set source" spacing="0"/>
              <portSpacing port="sink_model sink" spacing="0"/>
            </process>
          </operator>
          <connect from_port="in 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Impute Missing Values" to_port="example set in"/>
          <connect from_op="Impute Missing Values" from_port="example set out" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply" width="90" x="313" y="34"/>
      <operator activated="true" class="concurrency:cross_validation" compatibility="9.9.000" expanded="true" height="145" name="k-NN (3)" width="90" x="447" y="187">
        <parameter key="split_on_batch_attribute" value="false"/>
        <parameter key="leave_one_out" value="false"/>
        <parameter key="number_of_folds" value="10"/>
        <parameter key="sampling_type" value="automatic"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="k_nn" compatibility="9.9.000" expanded="true" height="82" name="k-NN (2)" width="90" x="246" y="34">
            <parameter key="k" value="5"/>
            <parameter key="weighted_vote" value="true"/>
            <parameter key="measure_types" value="MixedMeasures"/>
            <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
            <parameter key="nominal_measure" value="NominalDistance"/>
            <parameter key="numerical_measure" value="EuclideanDistance"/>
            <parameter key="divergence" value="GeneralizedIDivergence"/>
            <parameter key="kernel_type" value="radial"/>
            <parameter key="kernel_gamma" value="1.0"/>
            <parameter key="kernel_sigma1" value="1.0"/>
            <parameter key="kernel_sigma2" value="0.0"/>
            <parameter key="kernel_sigma3" value="2.0"/>
            <parameter key="kernel_degree" value="3.0"/>
            <parameter key="kernel_shift" value="1.0"/>
            <parameter key="kernel_a" value="1.0"/>
            <parameter key="kernel_b" value="0.0"/>
          </operator>
          <connect from_port="training set" to_op="k-NN (2)" to_port="training set"/>
          <connect from_op="k-NN (2)" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.9.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
            <list key="application_parameters"/>
            <parameter key="create_view" value="false"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="9.9.000" expanded="true" height="82" name="k-NN (4)" width="90" x="179" y="34">
            <parameter key="main_criterion" value="accuracy"/>
            <parameter key="accuracy" value="true"/>
            <parameter key="classification_error" value="false"/>
            <parameter key="kappa" value="true"/>
            <parameter key="weighted_mean_recall" value="false"/>
            <parameter key="weighted_mean_precision" value="false"/>
            <parameter key="spearman_rho" value="false"/>
            <parameter key="kendall_tau" value="false"/>
            <parameter key="absolute_error" value="false"/>
            <parameter key="relative_error" value="false"/>
            <parameter key="relative_error_lenient" value="false"/>
            <parameter key="relative_error_strict" value="false"/>
            <parameter key="normalized_absolute_error" value="false"/>
            <parameter key="root_mean_squared_error" value="true"/>
            <parameter key="root_relative_squared_error" value="false"/>
            <parameter key="squared_error" value="false"/>
            <parameter key="correlation" value="false"/>
            <parameter key="squared_correlation" value="false"/>
            <parameter key="cross-entropy" value="false"/>
            <parameter key="margin" value="true"/>
            <parameter key="soft_margin_loss" value="false"/>
            <parameter key="logistic_loss" value="false"/>
            <parameter key="skip_undefined_labels" value="true"/>
            <parameter key="use_example_weights" value="true"/>
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (2)" from_port="labelled data" to_op="k-NN (4)" to_port="labelled data"/>
          <connect from_op="k-NN (4)" from_port="performance" to_port="performance 1"/>
          <connect from_op="k-NN (4)" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="9.9.000" expanded="true" height="145" name="Naive Buyes" width="90" x="447" y="34">
        <parameter key="split_on_batch_attribute" value="false"/>
        <parameter key="leave_one_out" value="false"/>
        <parameter key="number_of_folds" value="10"/>
        <parameter key="sampling_type" value="automatic"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="naive_bayes" compatibility="9.9.000" expanded="true" height="82" name="Naive Bayes" width="90" x="246" y="34">
            <parameter key="laplace_correction" value="true"/>
          </operator>
          <connect from_port="training set" to_op="Naive Bayes" to_port="training set"/>
          <connect from_op="Naive Bayes" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.9.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
            <parameter key="create_view" value="false"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="9.9.000" expanded="true" height="82" name="Naive Bayes (2)" width="90" x="179" y="34">
            <parameter key="main_criterion" value="accuracy"/>
            <parameter key="accuracy" value="true"/>
            <parameter key="classification_error" value="false"/>
            <parameter key="kappa" value="true"/>
            <parameter key="weighted_mean_recall" value="false"/>
            <parameter key="weighted_mean_precision" value="false"/>
            <parameter key="spearman_rho" value="false"/>
            <parameter key="kendall_tau" value="false"/>
            <parameter key="absolute_error" value="false"/>
            <parameter key="relative_error" value="false"/>
            <parameter key="relative_error_lenient" value="false"/>
            <parameter key="relative_error_strict" value="false"/>
            <parameter key="normalized_absolute_error" value="false"/>
            <parameter key="root_mean_squared_error" value="true"/>
            <parameter key="root_relative_squared_error" value="false"/>
            <parameter key="squared_error" value="false"/>
            <parameter key="correlation" value="false"/>
            <parameter key="squared_correlation" value="false"/>
            <parameter key="cross-entropy" value="false"/>
            <parameter key="margin" value="true"/>
            <parameter key="soft_margin_loss" value="false"/>
            <parameter key="logistic_loss" value="false"/>
            <parameter key="skip_undefined_labels" value="true"/>
            <parameter key="use_example_weights" value="true"/>
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Naive Bayes (2)" to_port="labelled data"/>
          <connect from_op="Naive Bayes (2)" from_port="performance" to_port="performance 1"/>
          <connect from_op="Naive Bayes (2)" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Subprocess" to_port="in 1"/>
      <connect from_op="Subprocess" from_port="out 1" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Naive Buyes" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="k-NN (3)" to_port="example set"/>
      <connect from_op="k-NN (3)" from_port="model" to_port="result 4"/>
      <connect from_op="k-NN (3)" from_port="test result set" to_port="result 6"/>
      <connect from_op="k-NN (3)" from_port="performance 1" to_port="result 5"/>
      <connect from_op="Naive Buyes" from_port="model" to_port="result 1"/>
      <connect from_op="Naive Buyes" from_port="test result set" to_port="result 3"/>
      <connect from_op="Naive Buyes" from_port="performance 1" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
      <portSpacing port="sink_result 7" spacing="0"/>
    </process>
  </operator>
</process>

my question is, is it correct? or is there any methods that i could tried to achieve better results?

Thanks in advance!

Answers

  • legeithienlegeithien Member Posts: 3 Learner I
    please.. anyone?
  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 2,937  RM Data Scientist
    I think you should consider to treat this as a time series problem, only that your 'time' is depth.

    Best,
    Martin
    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany
  • legeithienlegeithien Member Posts: 3 Learner I
    edited May 4
    @mschmitz

    will try to do that and give the feedback, thanks for responding it :smile:

    edit: after i look further into it, i dont think it was a suitable method for my data because my data has a time series of "depth" but i need to look for the content of soil on that depth which the depth doesnt have to be a series. what i was looking for is how do i predict the content of that depth


Sign In or Register to comment.