‎09-20-2017 06:56 PM

1200px-Wikipedia-logo-v2-en.svg.pngThis is a quick article about how to use the Enrich Data via Webservice operator (found in the Web Mining extension) to get information about Wikipedia via their REST API webservice.  This API can find many different sources of information such as page views, formula grabs, unique device counts, etc..  Full documentation can be found here: https://wikimedia.org/api/rest_v1


This particular API is VERY easy to use - there is no authentication and the only limitation is a 200 query count per day.  Simply enter the URL, insert the relevant attributes or macros, and set up the JSON paths to organize the output.  Boom.


This is an example of a short process that check the page count of the RapidMiner Wikipedia page (of course) the day prior to when the process is executed.



<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
  <operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="generate_data_user_specification" compatibility="7.6.001" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="85">
        <list key="attribute_values">
          <parameter key="startdate" value="date_add(date_now(),-1,DATE_UNIT_DAY)"/>
          <parameter key="enddate" value="startdate"/>
        <list key="set_additional_roles"/>
      <operator activated="true" class="date_to_nominal" compatibility="7.6.001" expanded="true" height="82" name="Date to Nominal" width="90" x="179" y="85">
        <parameter key="attribute_name" value="startdate"/>
        <parameter key="date_format" value="yyyyMMdd"/>
      <operator activated="true" class="date_to_nominal" compatibility="7.6.001" expanded="true" height="82" name="Date to Nominal (2)" width="90" x="313" y="85">
        <parameter key="attribute_name" value="enddate"/>
        <parameter key="date_format" value="yyyyMMdd"/>
      <operator activated="true" class="web:enrich_data_by_webservice" compatibility="7.3.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="447" y="85">
        <parameter key="query_type" value="JsonPath"/>
        <list key="string_machting_queries"/>
        <list key="regular_expression_queries"/>
        <list key="regular_region_queries"/>
        <list key="xpath_queries"/>
        <list key="namespaces"/>
        <list key="index_queries"/>
        <list key="jsonpath_queries">
          <parameter key="project" value="$..project"/>
          <parameter key="article" value="$..article"/>
          <parameter key="granularity" value="$..granularity"/>
          <parameter key="timestamp" value="$..timestamp"/>
          <parameter key="access" value="$..access"/>
          <parameter key="agent" value="$..agent"/>
          <parameter key="views" value="$..views"/>
        <parameter key="url" value="https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/RapidMiner/daily/&lt;%startdate%&gt;/&lt;%enddate%&gt;"/>
        <list key="request_properties"/>
      <connect from_op="Generate Data by User Specification" from_port="output" to_op="Date to Nominal" to_port="example set input"/>
      <connect from_op="Date to Nominal" from_port="example set output" to_op="Date to Nominal (2)" to_port="example set input"/>
      <connect from_op="Date to Nominal (2)" from_port="example set output" to_op="Enrich Data by Webservice" to_port="Example Set"/>
      <connect from_op="Enrich Data by Webservice" from_port="ExampleSet" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>








Scott Genzer
Senior Community Manager
RapidMiner, Inc.