RapidMiner

Enrich Data by Web Service for Twitter

SOLVED
Regular Contributor

Enrich Data by Web Service for Twitter

What am I doing wrong here, says I canot connect to the url:

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="read_excel" compatibility="7.5.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="187">
<parameter key="excel_file" value="/Users/xlsfile.xlsx"/>
<parameter key="sheet_number" value="1"/>
<parameter key="imported_cell_range" value="A1:B2364"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="date_format" value=""/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="Name.true.polynominal.attribute"/>
<parameter key="1" value="f_id.true.numeric.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="format_numbers" compatibility="7.5.003" expanded="true" height="82" name="Format Numbers" width="90" x="246" y="187">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="f_id"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="real"/>
<parameter key="block_type" value="value_series"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_series_end"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="format_type" value="pattern"/>
<parameter key="pattern" value="0"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="use_grouping" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="web:enrich_data_by_webservice" compatibility="7.3.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="514" y="187">
<parameter key="query_type" value="JsonPath"/>
<list key="string_machting_queries"/>
<parameter key="attribute_type" value="Nominal"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries"/>
<list key="namespaces"/>
<parameter key="ignore_CDATA" value="true"/>
<parameter key="assume_html" value="true"/>
<list key="index_queries"/>
<list key="jsonpath_queries"/>
<parameter key="request_method" value="GET"/>
<parameter key="url" value="https://api.twitter.com/1.1/users/lookup.json?userid=&lt;%f_id%&gt;,twitter"/>
<parameter key="delay" value="0"/>
<list key="request_properties">
<parameter key="Authorisation" value="OAuth oauth_consumer_key=&quot;n4XUp0xONUyJ7HxPdAMzg&quot;, oauth_nonce=&quot;TgmOuBfDLuu1DQNlM6BxrbyCmOJk9pkOGfi0TzJIA&quot;, oauth_signature=&quot;tnnArxj06cWHq44gCs1OSKk%2FjLY%3D&quot;, oauth_signature_method=&quot;HMAC-SHA1&quot;, oauth_timestamp=&quot;OAuth oauth_consumer_key=&<mykey>;, oauth_nonce=&quot;<mytoken>&quot;, oauth_signature=&quot;tnnArxj06cWHq44gCs1OSKk%2FjLY%3D&quot;, oauth_signature_method=&quot;HMAC-SHA1&quot;, oauth_timestamp=&quot;1318622958&quot;, oauth_token=&quot;<mytoken>;, oauth_version=&quot;1.0&quot;&quot;, oauth_token=&quot;<mytoken>&quot;, oauth_version=&quot;1.0&quot;"/>
</list>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>

4 REPLIES
Community Manager

Re: Enrich Data by Web Service for Twitter

hi @robin - can you please repost that XML directly from your RapidMiner Studio XML panel and into a "code" pane here (use the </> button)?  The XML you posted looks like three processes mixed together.  Thanks.

 

Scott

Scott Genzer
Senior Community Manager
RapidMiner, Inc.
Highlighted
Regular Contributor

Re: Enrich Data by Web Service for Twitter

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="read_csv" compatibility="7.5.003" expanded="true" height="68" name="Read CSV" width="90" x="112" y="391">
    <parameter key="csv_file" value="/Users/Robin/Dropbox/personality/t_person2.csv"/>
    <parameter key="column_separators" value=";"/>
    <parameter key="trim_lines" value="false"/>
    <parameter key="use_quotes" value="true"/>
    <parameter key="quotes_character" value="&quot;"/>
    <parameter key="escape_character" value="\"/>
    <parameter key="skip_comments" value="false"/>
    <parameter key="comment_characters" value="#"/>
    <parameter key="parse_numbers" value="true"/>
    <parameter key="decimal_character" value="."/>
    <parameter key="grouped_digits" value="false"/>
    <parameter key="grouping_character" value=","/>
    <parameter key="date_format" value=""/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
      <parameter key="0" value="Name"/>
    </list>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="encoding" value="UTF-8"/>
    <list key="data_set_meta_data_information">
      <parameter key="0" value="t_person.true.polynominal.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="true"/>
    <parameter key="datamanagement" value="double_array"/>
    <parameter key="data_management" value="auto"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="web:enrich_data_by_webservice" compatibility="7.3.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="246" y="391">
    <parameter key="query_type" value="JsonPath"/>
    <list key="string_machting_queries"/>
    <parameter key="attribute_type" value="Nominal"/>
    <list key="regular_expression_queries"/>
    <list key="regular_region_queries"/>
    <list key="xpath_queries"/>
    <list key="namespaces"/>
    <parameter key="ignore_CDATA" value="true"/>
    <parameter key="assume_html" value="true"/>
    <list key="index_queries"/>
    <list key="jsonpath_queries"/>
    <parameter key="request_method" value="POST"/>
    <parameter key="url" value="https://app.receptiviti.com/v2/api/import/twitter/user"/>
    <parameter key="delay" value="0"/>
    <list key="request_properties">
      <parameter key="Accept" value="application/hal+json"/>
      <parameter key="X-API-KEY" value="5991b4e03347de053771b413"/>
      <parameter key="X-API-SECRET-KEY" value="ptg1oEOIQDqxJtFu17mNG5RtC05at3xgUCiq5kSsZVU"/>
      <parameter key="Content-Type" value="application/json"/>
    </list>
    <parameter key="encoding" value="SYSTEM"/>
  </operator>
</process>

The data for the file is as follows: 

 

t_users t_person
RobinMeisel 5992cd00779c6f06acb70810
Clarke_May 59a662c4c38b8006d67a51a1
ethelrecruit 59a6852780665106cfb4c357
dylanpcotter 59a5726cc38b8006d87a519f
DeborahHass 599d794d8f95b006d0ee2be9
s_p_i_k_e_s 599d64322971f906d0646c2c
SLR_Goncalves 599d637414cf0e06d9de9807

 

I have left the API and secret inside the post to facilitate the answer, I will edit and remove once we have worked this out. 

Moderator

Re: Enrich Data by Web Service for Twitter

There still is something wrong with the XML, it's like its nesting multiple processes.

Regular Contributor
Solution
Accepted by topic author robin
3 weeks ago

Re: Enrich Data by Web Service for Twitter

Solved the problem, on the data to docments it is important to use the combine documents operator. Once combined they need to be read in to continue processing. 

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="productivity:execute_program" compatibility="7.5.003" expanded="true" height="103" name="Execute Program (2)" width="90" x="45" y="85">
    <parameter key="command" value="./tw_imp_id.sh"/>
    <parameter key="log_stdout" value="true"/>
    <parameter key="log_stderr" value="true"/>
    <parameter key="working_directory" value="/Users/Robin/Dropbox/import_id"/>
    <list key="env_variables"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="text:read_document" compatibility="7.5.000" expanded="true" height="68" name="Read Document (2)" width="90" x="179" y="85">
    <parameter key="extract_text_only" value="false"/>
    <parameter key="use_file_extension_as_type" value="true"/>
    <parameter key="content_type" value="txt"/>
    <parameter key="encoding" value="SYSTEM"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="text:json_to_data" compatibility="7.5.000" expanded="true" height="82" name="JSON To Data (2)" width="90" x="313" y="85">
    <parameter key="ignore_arrays" value="false"/>
    <parameter key="limit_attributes" value="false"/>
    <parameter key="skip_invalid_documents" value="false"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="447" y="85">
    <parameter key="select_attributes_and_weights" value="true"/>
    <list key="specify_weights">
      <parameter key="id" value="1.0"/>
    </list>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="text:combine_documents" compatibility="7.5.000" expanded="true" height="82" name="Combine Documents" width="90" x="581" y="85"/>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="text:write_document" compatibility="7.5.000" expanded="true" height="82" name="Write Document" width="90" x="715" y="85">
    <parameter key="file" value="/Users/Robin/test.csv.*"/>
    <parameter key="overwrite" value="true"/>
    <parameter key="encoding" value="SYSTEM"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="text:documents_to_data" compatibility="7.5.000" expanded="true" height="82" name="Documents to Data" width="90" x="849" y="85">
    <parameter key="text_attribute" value="text"/>
    <parameter key="label_attribute" value="text"/>
    <parameter key="add_meta_information" value="true"/>
    <parameter key="datamanagement" value="double_sparse_array"/>
    <parameter key="data_management" value="auto"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes (3)" width="90" x="983" y="85">
    <parameter key="attribute_filter_type" value="regular_expression"/>
    <parameter key="attribute" value=""/>
    <parameter key="attributes" value="[0]._id"/>
    <parameter key="regular_expression" value=".*_id"/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="attribute_value"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="time"/>
    <parameter key="block_type" value="attribute_block"/>
    <parameter key="use_block_type_exception" value="true"/>
    <parameter key="except_block_type" value="attribute_block"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
  </operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <operator activated="true" class="transpose" compatibility="7.5.003" expanded="true" height="82" name="Transpose" width="90" x="1117" y="85"/>
</process>