Turning JSON Data into Spreadsheed

Anna_May1Anna_May1 Member Posts: 14 Learner I
Hi there RapidMiner Community, 

I'm currently trying to load JSON data into RapidMiner to use it for a sentiment analysis I'm working on. 
Browsing threw a lot of the forum content on here regarding the work with JSON data and trying out a lot of the solutions, I sadly haven't found any that worked for me.

The data should look something like the xlsx file I attached here, after loading the data into RapidMiner. I have done the conversion of this JSON file online but due to the large amount of data I have doing that entirely online isn't really an option.
Attached you can find the JSON data, as well as what it should turn into (xlsx file attached). 

Is there any way to turn the JSON file into an Excel file/Spreadsheed in general, like the one I attached?
If so, is there a way to convert multiple JSON files into one spreadsheet (or solution) at once?

Looking forward to hearing from you guys!

Cheers, 

Anna 

Best Answer

  • MarcoBarradasMarcoBarradas Administrator, Employee, RapidMiner Certified Analyst, Member Posts: 231 Unicorn
    edited November 2020 Solution Accepted
    @Anna_May1
    See if this helps you.
    I'm leaving the last part of the answers for you to process. Yo need to cut the JSON an then follow the same logic I applied to extract the previous information.
    <?xml version="1.0" encoding="UTF-8"?><process version="9.8.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.8.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="text:read_document" compatibility="9.3.001" expanded="true" height="68" name="JSON" width="90" x="112" y="34">
            <parameter key="file" value="C:/Users/mbarradas/Downloads/LucyandYak_2020-09-19_07-52-37_UTC_comments.json"/>
            <parameter key="extract_text_only" value="true"/>
            <parameter key="use_file_extension_as_type" value="true"/>
            <parameter key="content_type" value="txt"/>
            <parameter key="encoding" value="SYSTEM"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="9.8.000" expanded="true" height="103" name="Multiply" width="90" x="179" y="187"/>
          <operator activated="true" class="text:process_documents" compatibility="9.3.001" expanded="true" height="103" name="Answers" width="90" x="313" y="187">
            <parameter key="create_word_vector" value="false"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="true"/>
            <parameter key="keep_text" value="false"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <process expanded="true">
              <operator activated="true" class="text:extract_information" compatibility="9.3.001" expanded="true" height="68" name="Extract Information (3)" width="90" x="112" y="34">
                <parameter key="query_type" value="JsonPath"/>
                <list key="string_machting_queries"/>
                <parameter key="attribute_type" value="Nominal"/>
                <list key="regular_expression_queries"/>
                <list key="regular_region_queries"/>
                <list key="xpath_queries"/>
                <list key="namespaces"/>
                <parameter key="ignore_CDATA" value="true"/>
                <parameter key="assume_html" value="true"/>
                <list key="index_queries"/>
                <list key="jsonpath_queries">
                  <parameter key="Id_general" value=".id"/>
                </list>
              </operator>
              <operator activated="true" class="text:cut_document" compatibility="9.3.001" expanded="true" height="68" name="Cut Document (2)" width="90" x="313" y="34">
                <parameter key="query_type" value="JsonPath"/>
                <list key="string_machting_queries"/>
                <parameter key="attribute_type" value="Nominal"/>
                <list key="regular_expression_queries"/>
                <list key="regular_region_queries"/>
                <list key="xpath_queries"/>
                <list key="namespaces"/>
                <parameter key="ignore_CDATA" value="true"/>
                <parameter key="assume_html" value="true"/>
                <list key="index_queries"/>
                <list key="jsonpath_queries">
                  <parameter key="Fila" value=".answers[*]"/>
                </list>
                <process expanded="true">
                  <operator activated="true" class="text:extract_information" compatibility="9.3.001" expanded="true" height="68" name="Extract Information (2)" width="90" x="313" y="34">
                    <parameter key="query_type" value="JsonPath"/>
                    <list key="string_machting_queries"/>
                    <parameter key="attribute_type" value="Nominal"/>
                    <list key="regular_expression_queries"/>
                    <list key="regular_region_queries"/>
                    <list key="xpath_queries"/>
                    <list key="namespaces"/>
                    <parameter key="ignore_CDATA" value="true"/>
                    <parameter key="assume_html" value="true"/>
                    <list key="index_queries"/>
                    <list key="jsonpath_queries">
                      <parameter key="ID_answer" value=".id"/>
                      <parameter key="created_at_answer" value=".created_at"/>
                      <parameter key="text_answer" value=".text"/>
                      <parameter key="owner.id_answer" value=".owner.id"/>
                      <parameter key="is_verified_answer" value=".is_verified"/>
                      <parameter key="profile_pic_url_answer" value=".profile_pic_url"/>
                      <parameter key="username_answer" value=".username"/>
                      <parameter key="likes_count_answer" value=".likes_count"/>
                    </list>
                  </operator>
                  <operator activated="false" class="de_pivot" compatibility="9.8.000" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="136">
                    <list key="attribute_name">
                      <parameter key="id" value="\[\d+]\.id"/>
                      <parameter key="created_at" value="\[\d+]\.created_at"/>
                      <parameter key="text" value="\[\d+]\.text"/>
                      <parameter key="owner_id" value="\[\d+]\.owner.id"/>
                      <parameter key="is_verified" value="\[\d+]\.is_verified"/>
                      <parameter key="profile_pic_url" value="\[\d+]\.profile_pic_url"/>
                      <parameter key="username" value="\[\d+]\.username"/>
                      <parameter key="likes_count" value="\[\d+]\.likes_count"/>
                    </list>
                    <parameter key="index_attribute" value="ID"/>
                    <parameter key="create_nominal_index" value="false"/>
                    <parameter key="keep_missings" value="true"/>
                  </operator>
                  <connect from_port="segment" to_op="Extract Information (2)" to_port="document"/>
                  <connect from_op="Extract Information (2)" from_port="document" to_port="document 1"/>
                  <portSpacing port="source_segment" spacing="0"/>
                  <portSpacing port="sink_document 1" spacing="0"/>
                  <portSpacing port="sink_document 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="document" to_op="Extract Information (3)" to_port="document"/>
              <connect from_op="Extract Information (3)" from_port="document" to_op="Cut Document (2)" to_port="document"/>
              <connect from_op="Cut Document (2)" from_port="documents" to_port="document 1"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="text:process_documents" compatibility="9.3.001" expanded="true" height="103" name="Processing" width="90" x="246" y="34">
            <parameter key="create_word_vector" value="false"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="true"/>
            <parameter key="keep_text" value="false"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <process expanded="true">
              <operator activated="true" class="text:cut_document" compatibility="9.3.001" expanded="true" height="68" name="Cut Document" width="90" x="313" y="34">
                <parameter key="query_type" value="JsonPath"/>
                <list key="string_machting_queries"/>
                <parameter key="attribute_type" value="Nominal"/>
                <list key="regular_expression_queries"/>
                <list key="regular_region_queries"/>
                <list key="xpath_queries"/>
                <list key="namespaces"/>
                <parameter key="ignore_CDATA" value="true"/>
                <parameter key="assume_html" value="true"/>
                <list key="index_queries"/>
                <list key="jsonpath_queries">
                  <parameter key="Fila" value=".[*]"/>
                </list>
                <process expanded="true">
                  <operator activated="true" class="text:extract_information" compatibility="9.3.001" expanded="true" height="68" name="Extract Information" width="90" x="313" y="34">
                    <parameter key="query_type" value="JsonPath"/>
                    <list key="string_machting_queries"/>
                    <parameter key="attribute_type" value="Nominal"/>
                    <list key="regular_expression_queries"/>
                    <list key="regular_region_queries"/>
                    <list key="xpath_queries"/>
                    <list key="namespaces"/>
                    <parameter key="ignore_CDATA" value="true"/>
                    <parameter key="assume_html" value="true"/>
                    <list key="index_queries"/>
                    <list key="jsonpath_queries">
                      <parameter key="Id_general" value=".id"/>
                      <parameter key="created_at" value=".created_at"/>
                      <parameter key="text" value=".text"/>
                      <parameter key="owner.id" value=".owner.id"/>
                      <parameter key="is_verified" value=".is_verified"/>
                      <parameter key="profile_pic_url" value=".profile_pic_url"/>
                      <parameter key="username" value=".username"/>
                      <parameter key="likes_count" value=".likes_count"/>
                      <parameter key="answers" value=".answers"/>
                    </list>
                  </operator>
                  <operator activated="false" class="de_pivot" compatibility="9.8.000" expanded="true" height="82" name="De-Pivot" width="90" x="313" y="136">
                    <list key="attribute_name">
                      <parameter key="id" value="\[\d+]\.id"/>
                      <parameter key="created_at" value="\[\d+]\.created_at"/>
                      <parameter key="text" value="\[\d+]\.text"/>
                      <parameter key="owner_id" value="\[\d+]\.owner.id"/>
                      <parameter key="is_verified" value="\[\d+]\.is_verified"/>
                      <parameter key="profile_pic_url" value="\[\d+]\.profile_pic_url"/>
                      <parameter key="username" value="\[\d+]\.username"/>
                      <parameter key="likes_count" value="\[\d+]\.likes_count"/>
                    </list>
                    <parameter key="index_attribute" value="ID"/>
                    <parameter key="create_nominal_index" value="false"/>
                    <parameter key="keep_missings" value="true"/>
                  </operator>
                  <connect from_port="segment" to_op="Extract Information" to_port="document"/>
                  <connect from_op="Extract Information" from_port="document" to_port="document 1"/>
                  <portSpacing port="source_segment" spacing="0"/>
                  <portSpacing port="sink_document 1" spacing="0"/>
                  <portSpacing port="sink_document 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="document" to_op="Cut Document" to_port="document"/>
              <connect from_op="Cut Document" from_port="documents" to_port="document 1"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="concurrency:join" compatibility="9.8.000" expanded="true" height="82" name="Join" width="90" x="514" y="85">
            <parameter key="remove_double_attributes" value="true"/>
            <parameter key="join_type" value="left"/>
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="Id_general" value="Id_general"/>
            </list>
            <parameter key="keep_both_join_attributes" value="false"/>
          </operator>
          <connect from_op="JSON" from_port="output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Processing" to_port="documents 1"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Answers" to_port="documents 1"/>
          <connect from_op="Answers" from_port="example set" to_op="Join" to_port="right"/>
          <connect from_op="Processing" from_port="example set" to_op="Join" to_port="left"/>
          <connect from_op="Join" from_port="join" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    
    


    Anna_May1
Sign In or Register to comment.