Instagram search enrich data with web service

Robi_MeRobi_Me Member Posts: 32 Maven
edited January 2021 in Help
Hi

I had this process working yesterday, and for some reason I am not getting it right today. I am attempting to extract comments from Instagram for sentiment analysis, I need to search on the account level as well as the keyword level. 

I am not great at Json paths, have I done these correctly? When I paste the URL's into a web browser I am getting a Json response. 

<?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="187">
        <parameter key="generator_type" value="comma separated text"/>
        <parameter key="number_of_examples" value="100"/>
        <parameter key="use_stepsize" value="false"/>
        <list key="function_descriptions"/>
        <parameter key="add_id_attribute" value="false"/>
        <list key="numeric_series_configuration"/>
        <list key="date_series_configuration"/>
        <list key="date_series_configuration (interval)"/>
        <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="input_csv_text" value="term, search&#10;saradioawards, https://www.instagram.com/saradioawards/?__a=1&#10;pfizer,https://www.instagram.com/pfizerinc/?__a=1&#10;washington_post,https://www.instagram.com/coveringpotus/?__a=1"/>
        <parameter key="column_separator" value=","/>
        <parameter key="parse_all_as_nominal" value="false"/>
        <parameter key="decimal_point_character" value="."/>
        <parameter key="trim_attribute_names" value="true"/>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples" width="90" x="313" y="187">
        <parameter key="iteration_macro" value="example"/>
        <process expanded="true">
          <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34">
            <parameter key="macro" value="search"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="search"/>
            <parameter key="example_index" value="%{example}"/>
            <list key="additional_macros">
              <parameter key="term" value="term"/>
            </list>
          </operator>
          <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="313" y="34">
            <parameter key="query_type" value="JsonPath"/>
            <list key="string_machting_queries"/>
            <parameter key="attribute_type" value="Nominal"/>
            <list key="regular_expression_queries">
              <parameter key="foo" value=".*"/>
            </list>
            <list key="regular_region_queries"/>
            <list key="xpath_queries"/>
            <list key="namespaces"/>
            <parameter key="ignore_CDATA" value="true"/>
            <parameter key="assume_html" value="true"/>
            <list key="index_queries"/>
            <list key="jsonpath_queries">
              <parameter key="bio" value="$.graphql.user.biography"/>
              <parameter key="full_name" value="$.graphql.user.full_name"/>
              <parameter key="user_id" value="$.graphql.user.id"/>
              <parameter key="is_business_account" value="$.graphql.user.is_business_account"/>
              <parameter key="category_name" value="$.graphql.user.business_category_name"/>
              <parameter key="category_enum" value="$.graphql.user.category_enum"/>
              <parameter key="category_user" value="$.graphql.user.category_name"/>
              <parameter key="connected_fb_page" value="$.graphql.user.connected_fb_page"/>
              <parameter key="edges" value="$.graphql.user.edge_owner_to_timeline_media.edges"/>
            </list>
            <parameter key="request_method" value="GET"/>
            <parameter key="url" value="%{search}"/>
            <parameter key="delay" value="1000"/>
            <list key="request_properties"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="keep_sensitive_headers" value="true"/>
          </operator>
          <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay" width="90" x="447" y="34">
            <parameter key="delay" value="fixed"/>
            <parameter key="delay_amount" value="6000"/>
            <parameter key="min_delay_amount" value="0"/>
            <parameter key="max_delay_amount" value="1000"/>
            <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description>
          </operator>
          <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Enrich Data by Webservice" to_port="Example Set"/>
          <connect from_op="Enrich Data by Webservice" from_port="ExampleSet" to_op="Delay" to_port="through 1"/>
          <connect from_op="Delay" from_port="through 1" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="391">
        <parameter key="generator_type" value="comma separated text"/>
        <parameter key="number_of_examples" value="100"/>
        <parameter key="use_stepsize" value="false"/>
        <list key="function_descriptions"/>
        <parameter key="add_id_attribute" value="false"/>
        <list key="numeric_series_configuration"/>
        <list key="date_series_configuration"/>
        <list key="date_series_configuration (interval)"/>
        <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="input_csv_text" value="term, search&#10;saradioawards, https://www.instagram.com/explore/tags/saradioawards/?__a=1&amp;#10;pfizer,https://www.instagram.com/explore/tags/pfizer/?__a=1&amp;#10;washington_post,https://www.instagram.com/explore/tags/washington post/?__a=1"/>
        <parameter key="column_separator" value=","/>
        <parameter key="parse_all_as_nominal" value="false"/>
        <parameter key="decimal_point_character" value="."/>
        <parameter key="trim_attribute_names" value="true"/>
      </operator>
      <operator activated="false" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples (2)" width="90" x="313" y="391">
        <parameter key="iteration_macro" value="example"/>
        <process expanded="true">
          <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="34">
            <parameter key="macro" value="search"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="search"/>
            <parameter key="example_index" value="%{example}"/>
            <list key="additional_macros">
              <parameter key="term" value="term"/>
            </list>
          </operator>
          <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice (2)" width="90" x="313" y="34">
            <parameter key="query_type" value="JsonPath"/>
            <list key="string_machting_queries"/>
            <parameter key="attribute_type" value="Nominal"/>
            <list key="regular_expression_queries">
              <parameter key="foo" value=".*"/>
            </list>
            <list key="regular_region_queries"/>
            <list key="xpath_queries"/>
            <list key="namespaces"/>
            <parameter key="ignore_CDATA" value="true"/>
            <parameter key="assume_html" value="true"/>
            <list key="index_queries"/>
            <list key="jsonpath_queries">
              <parameter key="search_id" value="$.graphql.hashtag.id"/>
              <parameter key="search" value="$.graphql.hashtag.name"/>
              <parameter key="posts" value="$.graphql.hashtag.edge_hashtag_to_media.count"/>
            </list>
            <parameter key="request_method" value="GET"/>
            <parameter key="url" value="%{search}"/>
            <parameter key="delay" value="1000"/>
            <list key="request_properties"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="keep_sensitive_headers" value="true"/>
          </operator>
          <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay (2)" width="90" x="447" y="34">
            <parameter key="delay" value="random"/>
            <parameter key="delay_amount" value="1000"/>
            <parameter key="min_delay_amount" value="5000"/>
            <parameter key="max_delay_amount" value="9000"/>
            <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description>
          </operator>
          <connect from_port="example set" to_op="Extract Macro (2)" to_port="example set"/>
          <connect from_op="Extract Macro (2)" from_port="example set" to_op="Enrich Data by Webservice (2)" to_port="Example Set"/>
          <connect from_op="Enrich Data by Webservice (2)" from_port="ExampleSet" to_op="Delay (2)" to_port="through 1"/>
          <connect from_op="Delay (2)" from_port="through 1" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <description align="center" color="yellow" colored="false" height="169" resized="true" width="332" x="139" y="151">Keyword Search</description>
      <description align="center" color="yellow" colored="false" height="171" resized="true" width="328" x="143" y="348">Account Search</description>
    </process>
  </operator>
</process>


Best Answer

  • Robi_MeRobi_Me Member Posts: 32 Maven
    Solution Accepted
    Clear them cookies! 

    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="187">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="term, search&#10;saradioawards, https://www.instagram.com/saradioawards/?__a=1&#10;pfizer,https://www.instagram.com/pfizerinc/?__a=1&#10;washington_post,https://www.instagram.com/coveringpotus/?__a=1"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples" width="90" x="313" y="187">
            <parameter key="iteration_macro" value="example"/>
            <process expanded="true">
              <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34">
                <parameter key="macro" value="search"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="search"/>
                <parameter key="example_index" value="%{example}"/>
                <list key="additional_macros">
                  <parameter key="term" value="term"/>
                </list>
              </operator>
              <operator activated="true" class="web:clear_cookies" compatibility="9.3.001" expanded="true" height="82" name="Clear Cookies" width="90" x="380" y="85">
                <parameter key="cookie_scope" value="global"/>
              </operator>
              <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="514" y="85">
                <parameter key="query_type" value="JsonPath"/>
                <list key="string_machting_queries"/>
                <parameter key="attribute_type" value="Nominal"/>
                <list key="regular_expression_queries">
                  <parameter key="foo" value=".*"/>
                </list>
                <list key="regular_region_queries"/>
                <list key="xpath_queries"/>
                <list key="namespaces"/>
                <parameter key="ignore_CDATA" value="true"/>
                <parameter key="assume_html" value="true"/>
                <list key="index_queries"/>
                <list key="jsonpath_queries">
                  <parameter key="bio" value="$.graphql.user.biography"/>
                  <parameter key="full_name" value="$.graphql.user.full_name"/>
                  <parameter key="user_id" value="$.graphql.user.id"/>
                  <parameter key="is_business_account" value="$.graphql.user.is_business_account"/>
                  <parameter key="category_name" value="$.graphql.user.business_category_name"/>
                  <parameter key="category_enum" value="$.graphql.user.category_enum"/>
                  <parameter key="category_user" value="$.graphql.user.category_name"/>
                  <parameter key="connected_fb_page" value="$.graphql.user.connected_fb_page"/>
                  <parameter key="edges" value="$.graphql.user.edge_owner_to_timeline_media.edges"/>
                </list>
                <parameter key="request_method" value="GET"/>
                <parameter key="url" value="%{search}"/>
                <parameter key="delay" value="1000"/>
                <list key="request_properties"/>
                <parameter key="encoding" value="SYSTEM"/>
                <parameter key="keep_sensitive_headers" value="false"/>
              </operator>
              <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay" width="90" x="648" y="85">
                <parameter key="delay" value="random"/>
                <parameter key="delay_amount" value="1000"/>
                <parameter key="min_delay_amount" value="10000"/>
                <parameter key="max_delay_amount" value="12000"/>
              </operator>
              <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Extract Macro" from_port="example set" to_op="Clear Cookies" to_port="through 1"/>
              <connect from_op="Clear Cookies" from_port="through 1" to_op="Enrich Data by Webservice" to_port="Example Set"/>
              <connect from_op="Enrich Data by Webservice" from_port="ExampleSet" to_op="Delay" to_port="through 1"/>
              <connect from_op="Delay" from_port="through 1" to_port="output 1"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="391">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="term, search&#10;saradioawards, https://www.instagram.com/explore/tags/saradioawards/?__a=1&amp;#10;pfizer,https://www.instagram.com/explore/tags/pfizer/?__a=1&amp;#10;washington_post,https://www.instagram.com/explore/tags/washington post/?__a=1"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples (2)" width="90" x="313" y="391">
            <parameter key="iteration_macro" value="example"/>
            <process expanded="true">
              <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="34">
                <parameter key="macro" value="search"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="search"/>
                <parameter key="example_index" value="%{example}"/>
                <list key="additional_macros">
                  <parameter key="term" value="term"/>
                </list>
              </operator>
              <operator activated="true" class="web:clear_cookies" compatibility="9.3.001" expanded="true" height="82" name="Clear Cookies (2)" width="90" x="313" y="34">
                <parameter key="cookie_scope" value="global"/>
              </operator>
              <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice (2)" width="90" x="447" y="34">
                <parameter key="query_type" value="JsonPath"/>
                <list key="string_machting_queries"/>
                <parameter key="attribute_type" value="Nominal"/>
                <list key="regular_expression_queries">
                  <parameter key="foo" value=".*"/>
                </list>
                <list key="regular_region_queries"/>
                <list key="xpath_queries"/>
                <list key="namespaces"/>
                <parameter key="ignore_CDATA" value="true"/>
                <parameter key="assume_html" value="true"/>
                <list key="index_queries"/>
                <list key="jsonpath_queries">
                  <parameter key="search_id" value="$.graphql.hashtag.id"/>
                  <parameter key="search" value="$.graphql.hashtag.name"/>
                  <parameter key="posts" value="$.graphql.hashtag.edge_hashtag_to_media.count"/>
                </list>
                <parameter key="request_method" value="GET"/>
                <parameter key="url" value="%{search}"/>
                <parameter key="delay" value="1000"/>
                <list key="request_properties"/>
                <parameter key="encoding" value="SYSTEM"/>
                <parameter key="keep_sensitive_headers" value="true"/>
              </operator>
              <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay (2)" width="90" x="581" y="34">
                <parameter key="delay" value="random"/>
                <parameter key="delay_amount" value="1000"/>
                <parameter key="min_delay_amount" value="5000"/>
                <parameter key="max_delay_amount" value="9000"/>
                <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description>
              </operator>
              <connect from_port="example set" to_op="Extract Macro (2)" to_port="example set"/>
              <connect from_op="Extract Macro (2)" from_port="example set" to_op="Clear Cookies (2)" to_port="through 1"/>
              <connect from_op="Clear Cookies (2)" from_port="through 1" to_op="Enrich Data by Webservice (2)" to_port="Example Set"/>
              <connect from_op="Enrich Data by Webservice (2)" from_port="ExampleSet" to_op="Delay (2)" to_port="through 1"/>
              <connect from_op="Delay (2)" from_port="through 1" to_port="output 1"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Examples" to_port="example set"/>
          <connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/>
          <connect from_op="Create ExampleSet (2)" from_port="output" to_op="Loop Examples (2)" to_port="example set"/>
          <connect from_op="Loop Examples (2)" from_port="output 1" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <description align="center" color="yellow" colored="false" height="169" resized="true" width="332" x="139" y="151">Keyword Search</description>
          <description align="center" color="yellow" colored="false" height="171" resized="true" width="328" x="143" y="348">Account Search</description>
        </process>
      </operator>
    </process>
    


Sign In or Register to comment.