Developing svd from t-idf

MarlaBotMarlaBot Administrator, Moderator, Employee, Member Posts: 57 Community Manager
edited March 2019 in Help
A RapidMiner user wants to know the answer to this question: "Hey there. I'm trying to develop an svd from tf-idf, but when I go to the chart, I can't see the labels of the variables, only the numbers from svd. Does anybody know how to get to the labels? Thank you."

XML Process:

<?xml version="1.0" encoding="UTF-8"?><process version="9.2.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.2.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="read_excel" compatibility="9.2.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34"> <parameter key="excel_file" value="C:\Users\asus\hubiC\Máster\Máster GENI\18-19\Actividades\TF-IDF y SVD\MATRIZ T-D_Elena Rubio.xls"/> <parameter key="sheet_selection" value="sheet number"/> <parameter key="sheet_number" value="3"/> <parameter key="imported_cell_range" value="A1"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"/> <parameter key="date_format" value=""/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="read_all_values_as_polynominal" value="false"/> <list key="data_set_meta_data_information"> <parameter key="0" value="A.true.polynominal.attribute"/> <parameter key="1" value="B.true.integer.attribute"/> <parameter key="2" value="C.true.integer.attribute"/> <parameter key="3" value="D.true.integer.attribute"/> <parameter key="4" value="E.true.integer.attribute"/> <parameter key="5" value="F.true.integer.attribute"/> <parameter key="6" value="G.true.integer.attribute"/> <parameter key="7" value="H.true.integer.attribute"/> </list> <parameter key="read_not_matching_values_as_missings" value="false"/> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> </operator> <operator activated="true" class="generate_tfidf" compatibility="9.2.000" expanded="true" height="82" name="Generate TFIDF" width="90" x="179" y="34"> <parameter key="calculate_term_frequencies" value="true"/> </operator> <operator activated="true" class="nominal_to_numerical" compatibility="9.2.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="313" y="85"> <parameter key="return_preprocessing_model" value="false"/> <parameter key="create_view" value="false"/> <parameter key="attribute_filter_type" value="all"/> <parameter key="attribute" value=""/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="nominal"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="file_path"/> <parameter key="block_type" value="single_value"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="single_value"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> <parameter key="coding_type" value="dummy coding"/> <parameter key="use_comparison_groups" value="false"/> <list key="comparison_groups"/> <parameter key="unexpected_value_handling" value="all 0 and warning"/> <parameter key="use_underscore_in_name" value="false"/> </operator> <operator activated="true" class="singular_value_decomposition" compatibility="9.2.000" expanded="true" height="103" name="SVD" width="90" x="447" y="34"> <parameter key="dimensionality_reduction" value="fixed number"/> <parameter key="percentage_threshold" value="0.95"/> <parameter key="dimensions" value="2"/> </operator> <connect from_port="input 1" to_op="Read Excel" to_port="file"/> <connect from_op="Read Excel" from_port="output" to_op="Generate TFIDF" to_port="example set input"/> <connect from_op="Generate TFIDF" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/> <connect from_op="Nominal to Numerical" from_port="example set output" to_op="SVD" to_port="example set input"/> <connect from_op="SVD" from_port="example set output" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="source_input 2" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator> </process>

Answers

  • yyhuangyyhuang Administrator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    Hi,

    The input data is not available. But I guess you want check out the "pre" ouput node from SVD?


    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.2.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="112" y="34">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="Words,Doc1,Doc2,Doc3&#10;car,&#9;27.0,&#9;4.0,&#9;24.0&#10;insurance,&#9;0.0,&#9;33.0,&#9;29.0&#10;negative,&#9;3.0,&#9;33.0,&#9;0.0&#10;positive,&#9;14.0,&#9;0.0,&#9;17.0"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="generate_tfidf" compatibility="9.2.000" expanded="true" height="82" name="Generate TFIDF (2)" width="90" x="246" y="34">
            <parameter key="calculate_term_frequencies" value="true"/>
          </operator>
          <operator activated="true" class="nominal_to_numerical" compatibility="9.2.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="380" y="34">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="coding_type" value="dummy coding"/>
            <parameter key="use_comparison_groups" value="false"/>
            <list key="comparison_groups"/>
            <parameter key="unexpected_value_handling" value="all 0 and warning"/>
            <parameter key="use_underscore_in_name" value="false"/>
          </operator>
          <operator activated="true" class="singular_value_decomposition" compatibility="9.2.000" expanded="true" height="103" name="SVD" width="90" x="514" y="34">
            <parameter key="dimensionality_reduction" value="fixed number"/>
            <parameter key="percentage_threshold" value="0.95"/>
            <parameter key="dimensions" value="2"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Generate TFIDF (2)" to_port="example set input"/>
          <connect from_op="Generate TFIDF (2)" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
          <connect from_op="Nominal to Numerical" from_port="example set output" to_op="SVD" to_port="example set input"/>
          <connect from_op="SVD" from_port="example set output" to_port="result 1"/>
          <connect from_op="SVD" from_port="preprocessing model" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    


  • rmtorresrmtorres Member Posts: 4 Contributor I
    Thank you for your comment. Unfortunately, this is not the problem. What I am trying to do is to visualize the svd through a splotter plot type, but with the labels of the attributes, not with the coordinates
  • rmtorresrmtorres Member Posts: 4 Contributor I
    Finally, I have got a solution! When reading data from Excel, in Parameters  you have to go to "data set meta information", and in "edit list" you have to select in the first row the attribute as "text" and "label". I have highlighted in black / bold just aboved.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="read_excel" compatibility="9.2.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
            <parameter key="excel_file" value="C:\Users\asus\hubiC\Máster\Máster GENI\18-19\Actividades\TF-IDF y SVD\MATRIZ T-D_Elena Rubio.xls"/>
            <parameter key="sheet_selection" value="sheet number"/>
            <parameter key="sheet_number" value="2"/>
            <parameter key="imported_cell_range" value="A1"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="first_row_as_names" value="true"/>
            <list key="annotations"/>
            <parameter key="date_format" value=""/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="locale" value="Spanish"/>
            <parameter key="read_all_values_as_polynominal" value="false"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="A.true.text.label"/>
              <parameter key="1" value="1\.0.true.integer.attribute"/>
              <parameter key="2" value="2\.0.true.integer.attribute"/>
              <parameter key="3" value="3\.0.true.integer.attribute"/>
              <parameter key="4" value="4\.0.true.integer.attribute"/>
              <parameter key="5" value="5\.0.true.integer.attribute"/>
              <parameter key="6" value="6\.0.true.integer.attribute"/>
              <parameter key="7" value="7\.0.true.integer.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
          </operator>
          <operator activated="true" class="generate_tfidf" compatibility="9.2.000" expanded="true" height="82" name="Generate TFIDF" width="90" x="179" y="34">
            <parameter key="calculate_term_frequencies" value="true"/>
          </operator>
          <operator activated="true" class="singular_value_decomposition" compatibility="9.2.000" expanded="true" height="103" name="SVD" width="90" x="447" y="34">
            <parameter key="dimensionality_reduction" value="fixed number"/>
            <parameter key="percentage_threshold" value="0.95"/>
            <parameter key="dimensions" value="2"/>
          </operator>
          <connect from_port="input 1" to_op="Read Excel" to_port="file"/>
          <connect from_op="Read Excel" from_port="output" to_op="Generate TFIDF" to_port="example set input"/>
          <connect from_op="Generate TFIDF" from_port="example set output" to_op="SVD" to_port="example set input"/>
          <connect from_op="SVD" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>


Sign In or Register to comment.