Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

JSON processing using JQ operator in Rapidminer

Somnath_DasSomnath_Das Member Posts: 31 Maven
I have simple JSON data in the format as follows:

{"A":"a1","B":"b1"}
{"A":"a2","B":"b2"}
{"A":"a3","B":"b3"}

I want the output in the format:
A   B
a1  b1
a2  b2
a3  b3

Please help, it is urgent
Tagged:

Answers

  • BalazsBaranyBalazsBarany Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert Posts: 955 Unicorn
    Hi,

    when I try your input on jqplay.org with the filter [.A, .B] | @csv, it returns the correct output.

    The library used in the RapidMiner extension only returns the first element though. I'm not sure what to do there.

    Your input is a bit atypical because it's a list of objects, but not in a syntactically explicit way. 

    When changing the input to this:
    [
      {"A":"a1","B":"b1"},
      {"A":"a2","B":"b2"},
      {"A":"a3","B":"b3"}
    ]
    it works with the filter .[] | [.A, .B].

    Example process:

    <?xml version="1.0" encoding="UTF-8"?><process version="9.8.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.8.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="-1"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="text:create_document" compatibility="9.3.001" expanded="true" height="68" name="Create Document" width="90" x="112" y="34">
            <parameter key="text" value="[&#10;  {&quot;A&quot;:&quot;a1&quot;,&quot;B&quot;:&quot;b1&quot;},&#10;  {&quot;A&quot;:&quot;a2&quot;,&quot;B&quot;:&quot;b2&quot;},&#10;  {&quot;A&quot;:&quot;a3&quot;,&quot;B&quot;:&quot;b3&quot;}&#10;]"/>
            <parameter key="add label" value="false"/>
            <parameter key="label_type" value="nominal"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="9.8.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
          <operator activated="true" class="text:json_to_data" compatibility="9.3.001" expanded="true" height="82" name="JSON To Data" width="90" x="380" y="34">
            <parameter key="ignore_arrays" value="false"/>
            <parameter key="limit_attributes" value="false"/>
            <parameter key="skip_invalid_documents" value="false"/>
            <parameter key="guess_data_types" value="true"/>
            <parameter key="keep_missing_attributes" value="false"/>
            <parameter key="missing_values_aliases" value=", null, NaN, missing"/>
          </operator>
          <operator activated="true" class="json_processing_with_jq:process_document_with_jq" compatibility="0.8.001" expanded="true" height="68" name="Process Document with jq" width="90" x="380" y="136">
            <parameter key="indent_json" value="true"/>
            <parameter key="jq_expression" value=".[] | [.A, .B]"/>
            <parameter key="output_format" value="csv"/>
          </operator>
          <operator activated="true" class="text:write_document" compatibility="9.3.001" expanded="true" height="82" name="Write Document" width="90" x="514" y="136">
            <parameter key="overwrite" value="true"/>
            <parameter key="encoding" value="SYSTEM"/>
          </operator>
          <operator activated="true" class="read_csv" compatibility="9.8.001" expanded="true" height="68" name="Read CSV" width="90" x="648" y="136">
            <parameter key="column_separators" value=","/>
            <parameter key="trim_lines" value="false"/>
            <parameter key="use_quotes" value="true"/>
            <parameter key="quotes_character" value="&quot;"/>
            <parameter key="escape_character" value="\"/>
            <parameter key="skip_comments" value="false"/>
            <parameter key="comment_characters" value="#"/>
            <parameter key="starting_row" value="1"/>
            <parameter key="parse_numbers" value="true"/>
            <parameter key="decimal_character" value="."/>
            <parameter key="grouped_digits" value="false"/>
            <parameter key="grouping_character" value=","/>
            <parameter key="infinity_representation" value=""/>
            <parameter key="date_format" value=""/>
            <parameter key="first_row_as_names" value="false"/>
            <list key="annotations"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="locale" value="English (United States)"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="read_all_values_as_polynominal" value="false"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="A.true.nominal.attribute"/>
              <parameter key="1" value="B.true.nominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="true"/>
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
          </operator>
          <connect from_op="Create Document" from_port="output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="JSON To Data" to_port="documents 1"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Process Document with jq" to_port="input 1"/>
          <connect from_op="JSON To Data" from_port="example set" to_port="result 1"/>
          <connect from_op="Process Document with jq" from_port="result 1" to_op="Write Document" to_port="document"/>
          <connect from_op="Write Document" from_port="file" to_op="Read CSV" to_port="file"/>
          <connect from_op="Read CSV" from_port="output" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>


    With this input the output of "JSON to Data" is also meaningful.


    Regards,

    Balázs

Sign In or Register to comment.