Options

Finding Frequency of a datatype

aoneilaoneil Member Posts: 9 Contributor II
edited November 2018 in Help
I'm working with data that is essentially a bunch of timestamps for checkins. I've separated the dates into separate variables for month, day, hour but for the past hour I've been trying (to no avail) to basically convert this data into a format similar to what is displayed on a histogram.

Currently my data looks similar to this:

Month | Day | Hour
1        |1      | 1
1        |1      | 1
1        |1      | 1
1        |1      | 4
1        |1      | 4
1        |1      | 5

How do I convert in into data similar to this:

Month | Day | Hour | Frequency
1        |1      |1        | 3
1        |1      |2        | 0
1        |1      |3        | 0
1        |1      |4        | 2
1        |1      |5        | 1

Any help will be much appreciated, thank you so much :)

Answers

  • Options
    JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
    This should do it.  I'm sure there's more efficient ways than this, but it's 1am here and am a little tired. Sorry.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.0.000" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="text:create_document" compatibility="7.0.000" expanded="true" height="68" name="Create Document" width="90" x="45" y="34">
            <parameter key="text" value="Month|Day|Hour&#10;1|1|1&#10;1|1|1&#10;1|1|1&#10;1|1|4&#10;1|1|4&#10;1|1|5"/>
          </operator>
          <operator activated="true" class="text:write_document" compatibility="7.0.000" expanded="true" height="82" name="Write Document" width="90" x="45" y="187"/>
          <operator activated="true" class="read_csv" compatibility="7.0.000" expanded="true" height="68" name="Read CSV" width="90" x="179" y="187">
            <parameter key="column_separators" value="|"/>
            <list key="annotations"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="Month.true.nominal.attribute"/>
              <parameter key="1" value="Day.true.nominal.attribute"/>
              <parameter key="2" value="Hour.true.integer.attribute"/>
            </list>
          </operator>
          <operator activated="true" class="aggregate" compatibility="7.0.000" expanded="true" height="82" name="Aggregate" width="90" x="313" y="238">
            <list key="aggregation_attributes">
              <parameter key="Hour" value="count"/>
            </list>
            <parameter key="group_by_attributes" value="Day|Month|Hour"/>
          </operator>
          <operator activated="true" class="loop_values" compatibility="7.0.000" expanded="true" height="82" name="Loop Values (Month)" width="90" x="246" y="85">
            <parameter key="attribute" value="Month"/>
            <parameter key="iteration_macro" value="loop_month"/>
            <process expanded="true">
              <operator activated="true" class="filter_examples" compatibility="7.0.000" expanded="true" height="103" name="Filter Examples" width="90" x="45" y="34">
                <list key="filters_list">
                  <parameter key="filters_entry_key" value="Month.equals.%{loop_month}"/>
                </list>
              </operator>
              <operator activated="true" class="loop_values" compatibility="7.0.000" expanded="true" height="82" name="Loop Values (Day)" width="90" x="179" y="85">
                <parameter key="attribute" value="Day"/>
                <parameter key="iteration_macro" value="loop_day"/>
                <process expanded="true">
                  <operator activated="true" class="filter_examples" compatibility="7.0.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="45" y="34">
                    <list key="filters_list">
                      <parameter key="filters_entry_key" value="Day.equals.%{loop_day}"/>
                    </list>
                  </operator>
                  <operator activated="true" class="set_role" compatibility="7.0.000" expanded="true" height="82" name="Set Role (2)" width="90" x="179" y="136">
                    <parameter key="attribute_name" value="Hour"/>
                    <parameter key="target_role" value="id"/>
                    <list key="set_additional_roles"/>
                  </operator>
                  <operator activated="true" class="fill_data_gaps" compatibility="7.0.000" expanded="true" height="82" name="Fill Data Gaps (2)" width="90" x="246" y="34"/>
                  <operator activated="true" class="replace_missing_values" compatibility="7.0.000" expanded="true" height="103" name="Replace Missing Values (Hour)" width="90" x="313" y="187">
                    <parameter key="attribute_filter_type" value="value_type"/>
                    <parameter key="value_type" value="numeric"/>
                    <parameter key="default" value="zero"/>
                    <list key="columns"/>
                  </operator>
                  <operator activated="true" class="replace_missing_values" compatibility="7.0.000" expanded="true" height="103" name="Replace Missing Values (Day)" width="90" x="447" y="187">
                    <parameter key="attribute_filter_type" value="single"/>
                    <parameter key="attribute" value="Month"/>
                    <parameter key="value_type" value="numeric"/>
                    <parameter key="default" value="value"/>
                    <list key="columns"/>
                    <parameter key="replenishment_value" value="%{loop_month}"/>
                  </operator>
                  <operator activated="true" class="replace_missing_values" compatibility="7.0.000" expanded="true" height="103" name="Replace Missing Values (2)" width="90" x="581" y="34">
                    <parameter key="attribute_filter_type" value="single"/>
                    <parameter key="attribute" value="Day"/>
                    <parameter key="value_type" value="numeric"/>
                    <parameter key="default" value="value"/>
                    <list key="columns"/>
                    <parameter key="replenishment_value" value="%{loop_day}"/>
                  </operator>
                  <operator activated="true" class="set_role" compatibility="7.0.000" expanded="true" height="82" name="Set Role (3)" width="90" x="715" y="34">
                    <parameter key="attribute_name" value="Hour"/>
                    <list key="set_additional_roles"/>
                  </operator>
                  <connect from_port="example set" to_op="Filter Examples (2)" to_port="example set input"/>
                  <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
                  <connect from_op="Set Role (2)" from_port="example set output" to_op="Fill Data Gaps (2)" to_port="example set input"/>
                  <connect from_op="Fill Data Gaps (2)" from_port="example set output" to_op="Replace Missing Values (Hour)" to_port="example set input"/>
                  <connect from_op="Replace Missing Values (Hour)" from_port="example set output" to_op="Replace Missing Values (Day)" to_port="example set input"/>
                  <connect from_op="Replace Missing Values (Day)" from_port="example set output" to_op="Replace Missing Values (2)" to_port="example set input"/>
                  <connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
                  <connect from_op="Set Role (3)" from_port="example set output" to_port="out 1"/>
                  <portSpacing port="source_example set" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="82" name="Append (2)" width="90" x="296" y="85"/>
              <connect from_port="example set" to_op="Filter Examples" to_port="example set input"/>
              <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Values (Day)" to_port="example set"/>
              <connect from_op="Loop Values (Day)" from_port="out 1" to_op="Append (2)" to_port="example set 1"/>
              <connect from_op="Append (2)" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="82" name="Append" width="90" x="447" y="85"/>
          <operator activated="true" class="rename" compatibility="7.0.000" expanded="true" height="82" name="Rename" width="90" x="571" y="85">
            <parameter key="old_name" value="count(Hour)"/>
            <parameter key="new_name" value="Frequency"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <connect from_op="Create Document" from_port="output" to_op="Write Document" to_port="document"/>
          <connect from_op="Write Document" from_port="file" to_op="Read CSV" to_port="file"/>
          <connect from_op="Read CSV" from_port="output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Loop Values (Month)" to_port="example set"/>
          <connect from_op="Loop Values (Month)" from_port="out 1" to_op="Append" to_port="example set 1"/>
          <connect from_op="Append" from_port="merged set" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
  • Options
    aoneilaoneil Member Posts: 9 Contributor II
    Thank you so much it works great!
Sign In or Register to comment.