Correct multiple record

u1111482u1111482 Member Posts: 1 Newbie
I am a newby to RapidMiner and this is my first question

I have a hourly time_series dataset in which I need to correct an attribute multiple times

DataSet is Date_time (dd/mm/yy hh:mm), Holiday (polynominal/name of holiday), temperature (kelvin)

The error holiday is only applied to the first record of the day eg 25/12/18 12:00am rather than applied to every hour of the day. I need to correct the data set so that for each hour of the day the record has the holiday associated with it. 

eg

25/12/18 12:00am = Christmas
25/12/18 1:00am = None
25/12/18 2:00am = None

I can generate a subset of the holidays but I can't work out how to correct all the incorrect examples

Any guidance on how to approach this would be fantastic'

thanks
Jeff

Answers

  • tftemmetftemme Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, RMResearcher, Member Posts: 102  RM Research
    edited August 14
    Hi Jeff,

    If you have always 24 Examples per day (so no holes in your hourly data). You could use Process Window operator to divide your data into days and then Replace Missing Values (Series) to replace the missings entries with the previous value.

    See the example process below.
    Hopes this helps
    Fabian

    <process version="9.4.000-BETA2">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000-BETA2" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.4.000-BETA2" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="34">
            <parameter key="generator_type" value="date series"/>
            <parameter key="number_of_examples" value="120"/>
            <parameter key="use_stepsize" value="true"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)">
              <parameter key="Date" value="2019-12-23 00:00:00.1.hour"/>
            </list>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
            <list key="function_descriptions">
              <parameter key="Holiday" value="if(Date==date_parse_custom(&quot;2019/12/25 00:00:00&quot;,&quot;yyyy/MM/dd HH:mm:ss&quot;),&quot;Christmas&quot;,MISSING_NOMINAL)"/>
            </list>
            <parameter key="keep_all" value="true"/>
          </operator>
          <operator activated="true" class="time_series:process_windows" compatibility="9.4.000-SNAPSHOT" expanded="true" height="82" name="Process Windows" width="90" x="514" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="has_indices" value="false"/>
            <parameter key="indices_attribute" value=""/>
            <parameter key="window_size" value="24"/>
            <parameter key="no_overlapping_windows" value="true"/>
            <parameter key="step_size" value="1"/>
            <parameter key="create_horizon_(labels)" value="false"/>
            <parameter key="horizon_attribute" value=""/>
            <parameter key="horizon_size" value="1"/>
            <parameter key="horizon_offset" value="0"/>
            <parameter key="add_last_index_in_window_attribute" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="time_series:replace_missing_values" compatibility="9.4.000-SNAPSHOT" expanded="true" height="68" name="Replace Missing Values (Series)" width="90" x="447" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="Holiday"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="has_indices" value="false"/>
                <parameter key="indices_attribute" value=""/>
                <parameter key="overwrite_attributes" value="true"/>
                <parameter key="new_attributes_postfix" value="_cleaned"/>
                <parameter key="replace_type_numerical" value="previous value"/>
                <parameter key="replace_type_nominal" value="previous value"/>
                <parameter key="replace_type_date_time" value="previous value"/>
                <parameter key="replace_value_numerical" value="0.0"/>
                <parameter key="replace_value_nominal" value="unknown"/>
                <parameter key="skip_other_missings" value="true"/>
                <parameter key="replace_infinity" value="true"/>
                <parameter key="replace_empty_strings" value="true"/>
                <parameter key="ensure_finite_values" value="false"/>
              </operator>
              <connect from_port="windowed example set" to_op="Replace Missing Values (Series)" to_port="example set"/>
              <connect from_op="Replace Missing Values (Series)" from_port="example set" to_port="output 1"/>
              <portSpacing port="source_windowed example set" spacing="0"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="append" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Append" width="90" x="648" y="34">
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="merge_type" value="all"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Process Windows" to_port="example set"/>
          <connect from_op="Process Windows" from_port="output 1" to_op="Append" to_port="example set 1"/>
          <connect from_op="Append" from_port="merged set" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    


    Tghadially
  • BalazsBaranyBalazsBarany Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert Posts: 281   Unicorn
Sign In or Register to comment.