Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
weight of attributes -
Dear RapidMiner Community!
I am a newbie here, the same as in data science. I am doing my first analysis project for the college assignment.
I tried to find the answer here in the forum and followed the suggestions, but still I am stuck.
The data set I am working on has got 35 attributes, the target one is binominal (yes/no).
Before I choose the most relevant attributes for further exploring and examing correlation, I want to see how much % of positive values 'Yes' there is in every attribute.
I will appreciate any help for a begginer student.
Cheers,Gosia
I am a newbie here, the same as in data science. I am doing my first analysis project for the college assignment.
I tried to find the answer here in the forum and followed the suggestions, but still I am stuck.
The data set I am working on has got 35 attributes, the target one is binominal (yes/no).
Before I choose the most relevant attributes for further exploring and examing correlation, I want to see how much % of positive values 'Yes' there is in every attribute.
I will appreciate any help for a begginer student.
Cheers,Gosia
Tagged:
0
Best Answers
-
MartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,529 RM Data ScientistHi @GosiaRze ,you can do aggregate with an default aggregation of sum and group by your assignment attribute.Best,Martin- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany6 -
GosiaRze Member Posts: 3 Learner I@mschmitz - Thank you! I tried the Aggregate operator yesterday, but still I make some mistake.
If I understood correctly:
"Aggregate-> Default Aggregation -> Sum -> Group by attributes -> (my attribute) "
What I get is the sum of the data in different columns, e.g. for the column "Age" I got the sum of age values for "Yes" and "No", respectively to my target attribute. That is not what I am looking for.
I changed "Default Aggregation->Sum" for "Default Aggregation -> Count (percantage)", but the results for every columns are the same - every columns shows the % of Yes and No from my target attribute.
What I trying to get is: how much % of Yes from my target attribute is linked to every column?
In other words, what % of examples in every column is defined by Yes and No from the target column?
What is the mistake that I make?
0 -
Telcontar120 RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635 UnicornYou can also run a Naive Bayes classifier and then output the model, which shows the distribution table which will have the % of Yes and No for each value of each attribute.
6
Answers
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.7.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.7.000" expanded="true" height="68" name="Retrieve Golf" width="90" x="179" y="136">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.7.000" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="136">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<description align="center" color="transparent" colored="false" width="126">only nominal</description>
</operator>
<operator activated="true" class="concurrency:loop_attributes" compatibility="9.7.000" expanded="true" height="82" name="Loop Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="attribute_name_macro" value="loop_attribute"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="blending:pivot" compatibility="9.7.000" expanded="true" height="82" name="Pivot (2)" width="90" x="179" y="34">
<parameter key="group_by_attributes" value="Play"/>
<parameter key="column_grouping_attribute" value="%{loop_attribute}"/>
<list key="aggregation_attributes">
<parameter key="%{loop_attribute}" value="count"/>
</list>
<parameter key="use_default_aggregation" value="false"/>
<parameter key="default_aggregation_function" value="first"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.7.000" expanded="true" height="82" name="Generate Attributes" width="90" x="514" y="34">
<list key="function_descriptions">
<parameter key="attribute" value="%{loop_attribute}"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_port="input 1" to_op="Pivot (2)" to_port="input"/>
<connect from_op="Pivot (2)" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="false" class="operator_toolbox:advanced_append" compatibility="2.7.000-SNAPSHOT" expanded="true" height="68" name="Append (Superset)" width="90" x="581" y="238"/>
<operator activated="false" class="aggregate" compatibility="9.7.000" expanded="true" height="82" name="Aggregate" width="90" x="447" y="289">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes">
<parameter key="Outlook" value="count"/>
</list>
<parameter key="group_by_attributes" value="Play|Outlook"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<connect from_op="Retrieve Golf" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Loop Attributes" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Dortmund, Germany
Dortmund, Germany