🎉 🎉   RAPIDMINER 9.5 BETA IS OUT!!!   🎉 🎉

GRAB THE HOTTEST NEW BETA OF RAPIDMINER STUDIO, SERVER, AND RADOOP. LET US KNOW WHAT YOU THINK!

CLICK HERE TO DOWNLOAD

🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

Get file name from csv/excel operator dynamically, split file name and pass it write db operator

RadhakrishnaRadhakrishna Member Posts: 3 Newbie
edited October 7 in Help
Hi,
         i am very new to RM. I have to read file name from csv/excel  file.
Split the file name with _ delimiter and pass them to  write database operator to store them in database.
Could you please suggest me how we can implement this solution?
Thanks,
Radhakrishna

Answers

  • kaymankayman Member Posts: 396   Unicorn
    Are your excel files stored in a folder and do you loop through them?
    In that case the loop files operator is the easiest way to start. Find attached a simplified example, it will require further improvements but it might get you started.


    <?xml version="1.0" encoding="UTF-8"?><process version="9.4.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="UTF-8"/>
        <process expanded="true">
          <operator activated="true" class="concurrency:loop_files" compatibility="9.4.001" expanded="true" height="82" name="Loop Files" width="90" x="179" y="34">
            <parameter key="directory" value="your_excel_folder"/>
            <parameter key="filter_type" value="glob"/>
            <parameter key="recursive" value="false"/>
            <parameter key="enable_macros" value="true"/>
            <parameter key="macro_for_file_name" value="file_name"/>
            <parameter key="macro_for_file_type" value="file_type"/>
            <parameter key="macro_for_folder_name" value="folder_name"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="false"/>
            <process expanded="true">
              <operator activated="true" class="utility:create_exampleset" compatibility="9.4.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="447" y="85">
                <parameter key="generator_type" value="attribute functions"/>
                <parameter key="number_of_examples" value="1"/>
                <parameter key="use_stepsize" value="false"/>
                <list key="function_descriptions">
                  <parameter key="my_excel_file_name" value="%{file_name}"/>
                </list>
                <parameter key="add_id_attribute" value="false"/>
                <list key="numeric_series_configuration"/>
                <list key="date_series_configuration"/>
                <list key="date_series_configuration (interval)"/>
                <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="column_separator" value=","/>
                <parameter key="parse_all_as_nominal" value="false"/>
                <parameter key="decimal_point_character" value="."/>
                <parameter key="trim_attribute_names" value="true"/>
                <description align="center" color="transparent" colored="false" width="126">create a single record with the excel name</description>
              </operator>
              <operator activated="true" class="replace" compatibility="9.4.001" expanded="true" height="82" name="Replace" width="90" x="581" y="85">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="my_excel_file_name"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value=".xlsx"/>
                <description align="center" color="transparent" colored="false" width="126">remove the extension</description>
              </operator>
              <operator activated="true" class="split" compatibility="9.4.001" expanded="true" height="82" name="Split" width="90" x="715" y="85">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="my_excel_file_name"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="split_pattern" value="_"/>
                <parameter key="split_mode" value="ordered_split"/>
                <description align="center" color="transparent" colored="false" width="126">split by _</description>
              </operator>
              <connect from_op="Create ExampleSet" from_port="output" to_op="Replace" to_port="example set input"/>
              <connect from_op="Replace" from_port="example set output" to_op="Split" to_port="example set input"/>
              <connect from_op="Split" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_file object" spacing="0"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">loop through all excel files and get the name split</description>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.2.000" expanded="true" height="82" name="Append (Superset)" width="90" x="313" y="34">
            <description align="center" color="transparent" colored="false" width="126">add all together</description>
          </operator>
          <connect from_op="Loop Files" from_port="output 1" to_op="Append (Superset)" to_port="example set 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
    </process>
    


    Tghadially
Sign In or Register to comment.