commit
						a281c1bcf5
					
				|  | @ -0,0 +1,56 @@ | |||
| Data Setup: | ||||
| ---------- | ||||
| Before you can run the prediction sample prediction.rb, you must load some csv | ||||
| formatted data into Google Storage. You can do this by running setup.sh with a  | ||||
| bucket/object name of your choice. You must first create the bucket you want to  | ||||
| use. This can be done with the gsutil function or via the web UI (Storage  | ||||
| Access) in the Google APIs Console. i.e.: | ||||
| # chmod 744 setup.sh | ||||
| # ./setup.sh BUCKET/OBJECT | ||||
| Note you need gsutil in your path for this to work. | ||||
| 
 | ||||
| In the script, you must then modify the datafile string. This must correspond with the | ||||
| bucket/object of your dataset (if you are using your own dataset). We have | ||||
| provided a setup.sh which will upload some basic sample data. The section is | ||||
| near the bottom of the script, under 'FILL IN DATAFILE' | ||||
| 
 | ||||
| API setup: | ||||
| --------- | ||||
| We need to allow the application to use your API access. Go to APIs Console | ||||
| https://code.google.com/apis/console, and select the project you want, go to API | ||||
| Access, and create an OAuth2 client if you have not yet already. You should | ||||
| generate a client ID and secret.  | ||||
| 
 | ||||
| This example will run through the server-side example, where the application | ||||
| gets authorization ahead of time, which is the normal use case for Prediction | ||||
| API. You can also set it up so the user can grant access. | ||||
| 
 | ||||
| First, run the google-api script to generate access and refresh tokens. Ex. | ||||
| 
 | ||||
| # cd google-api-ruby-client | ||||
| # ruby-1.9.2-p290  bin/google-api oauth-2-login --scope=https://www.googleapis.com/auth/prediction --client-id=NUMBER.apps.googleusercontent.com --client-secret=CLIENT_SECRET | ||||
| 
 | ||||
| Fill in your client-id and client-secret from the API Access page. You will | ||||
| probably have to set a redirect URI in your client ID | ||||
| (ex. http://localhost:12736/). You can do this by hitting 'Edit settings' in the | ||||
| API Access / Client ID section, and adding it to Authorized Redirect URIs. Not | ||||
| that this has to be exactly the same URI, http://localhost:12736 and | ||||
| http://localhost:12736/ are not the same in this case. | ||||
| 
 | ||||
| This should pop up a browser window, where you grant access. This will then | ||||
| generate a ~/.google-api.yaml file. You have two options here, you can either | ||||
| copy the the information directly in your code, or you can store this as a file | ||||
| and load it in the sample as a yaml. In this example we do the latter. NOTE: if | ||||
| you are loading it as a yaml, ensure you rename/move the file, as the | ||||
| ~/.google-api.yaml file can get overwritten. The script will work as is if you | ||||
| move the .google-api.yaml file to the sample directory. | ||||
| 
 | ||||
| 
 | ||||
| This sample currently does not cover some newer features of Prediction API such | ||||
| as streaming training, hosted models or class weights. If there are any | ||||
| questions or suggestions to improve the script please email us at | ||||
| prediction-api-discuss@googlegroups.com. | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,219 @@ | |||
| #!/usr/bin/ruby1.8  | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| # Copyright:: Copyright 2011 Google Inc. | ||||
| # License:: All Rights Reserved. | ||||
| # Original Author:: Bob Aman, Winton Davies, Robert Kaplow | ||||
| # Maintainer:: Robert Kaplow (mailto:rkaplow@google.com) | ||||
| 
 | ||||
| $:.unshift('lib') | ||||
| require 'rubygems' | ||||
| require 'sinatra' | ||||
| require 'datamapper' | ||||
| require 'google/api_client' | ||||
| require 'yaml' | ||||
| 
 | ||||
| use Rack::Session::Pool, :expire_after => 86400 # 1 day | ||||
| 
 | ||||
| # Set up our token store | ||||
| DataMapper.setup(:default, 'sqlite::memory:') | ||||
| class TokenPair | ||||
|   include DataMapper::Resource | ||||
| 
 | ||||
|   property :id, Serial | ||||
|   property :refresh_token, String | ||||
|   property :access_token, String | ||||
|   property :expires_in, Integer | ||||
|   property :issued_at, Integer | ||||
| 
 | ||||
|   def update_token!(object) | ||||
|     self.refresh_token = object.refresh_token | ||||
|     self.access_token = object.access_token | ||||
|     self.expires_in = object.expires_in | ||||
|     self.issued_at = object.issued_at | ||||
|   end | ||||
| 
 | ||||
|   def to_hash | ||||
|     return { | ||||
|       :refresh_token => refresh_token, | ||||
|       :access_token => access_token, | ||||
|       :expires_in => expires_in, | ||||
|       :issued_at => Time.at(issued_at) | ||||
|     } | ||||
|   end | ||||
| end | ||||
| TokenPair.auto_migrate! | ||||
| 
 | ||||
| before do | ||||
| 
 | ||||
|   # FILL IN THIS SECTION | ||||
|   # This will work if your yaml file is stored as ./google-api.yaml | ||||
|   # ------------------------ | ||||
|   oauth_yaml = YAML.load_file('.google-api.yaml') | ||||
|   @client = Google::APIClient.new | ||||
|   @client.authorization.client_id = oauth_yaml["client_id"] | ||||
|   @client.authorization.client_secret = oauth_yaml["client_secret"] | ||||
|   @client.authorization.scope = oauth_yaml["scope"] | ||||
|   @client.authorization.refresh_token = oauth_yaml["refresh_token"] | ||||
|   @client.authorization.access_token = oauth_yaml["access_token"] | ||||
|   # ----------------------- | ||||
| 
 | ||||
|   @client.authorization.redirect_uri = to('/oauth2callback') | ||||
| 
 | ||||
|   # Workaround for now as expires_in may be nil, but when converted to int it becomes 0. | ||||
|   @client.authorization.expires_in = Time.now + 1800 if @client.authorization.expires_in.to_i == 0 | ||||
| 
 | ||||
|   if session[:token_id] | ||||
|     # Load the access token here if it's available | ||||
|     token_pair = TokenPair.get(session[:token_id]) | ||||
|     @client.authorization.update_token!(token_pair.to_hash) | ||||
|   end | ||||
|   if @client.authorization.refresh_token && @client.authorization.expired? | ||||
|     @client.authorization.fetch_access_token! | ||||
|   end | ||||
| 
 | ||||
| 
 | ||||
|   @prediction = @client.discovered_api('prediction', 'v1.3') | ||||
|   unless @client.authorization.access_token || request.path_info =~ /^\/oauth2/ | ||||
|     redirect to('/oauth2authorize') | ||||
|   end | ||||
| end | ||||
| 
 | ||||
| get '/oauth2authorize' do | ||||
|   redirect @client.authorization.authorization_uri.to_s, 303 | ||||
| end | ||||
| 
 | ||||
| get '/oauth2callback' do | ||||
|   @client.authorization.fetch_access_token! | ||||
|   # Persist the token here | ||||
|   token_pair = if session[:token_id] | ||||
|     TokenPair.get(session[:token_id]) | ||||
|   else | ||||
|     TokenPair.new | ||||
|   end | ||||
|   token_pair.update_token!(@client.authorization) | ||||
|   token_pair.save() | ||||
|   session[:token_id] = token_pair.id | ||||
|   redirect to('/') | ||||
| end | ||||
| 
 | ||||
| get '/' do | ||||
|   # FILL IN DATAFILE: | ||||
|   # ---------------------------------------- | ||||
|   datafile = "BUCKET/OBJECT" | ||||
|   # ---------------------------------------- | ||||
|   # Train a predictive model. | ||||
|   train(datafile) | ||||
|   # Check to make sure the training has completed. | ||||
|   if (is_done?(datafile)) | ||||
|     # Do a prediction. | ||||
|     # FILL IN DESIRED INPUT: | ||||
|     # ------------------------------------------------------------------------------- | ||||
|     prediction,score = get_prediction(datafile, ["Alice noticed with some surprise."]) | ||||
|     # ------------------------------------------------------------------------------- | ||||
| 
 | ||||
|     # We currently just dump the results to output, but you can display them on the page if desired. | ||||
|     puts prediction | ||||
|     puts score     | ||||
|   end | ||||
| end | ||||
| 
 | ||||
| ## | ||||
| # Trains a predictive model. | ||||
| # | ||||
| # @param [String] filename The name of the file in Google Storage. NOTE: this do *not* | ||||
| #                 include the gs:// part. If the Google Storage path is gs://bucket/object, | ||||
| #                 then the correct string is "bucket/object" | ||||
| def train(datafile) | ||||
|   input = "{\"id\" : \"#{datafile}\"}" | ||||
|   puts "training input: #{input}" | ||||
|   status, headers, body = @client.execute(@prediction.training.insert, | ||||
|                                           {}, | ||||
|                                           input, | ||||
|                                           {'Content-Type' => 'application/json'}) | ||||
| end | ||||
| 
 | ||||
| ## | ||||
| # Returns the current training status | ||||
| # | ||||
| # @param [String] filename The name of the file in Google Storage. NOTE: this do *not* | ||||
| #                 include the gs:// part. If the Google Storage path is gs://bucket/object, | ||||
| #                 then the correct string is "bucket/object" | ||||
| # @return [Integer] status The HTTP status code of the training job. | ||||
| def get_training_status(datafile) | ||||
|   status, headers, body = @client.execute(@prediction.training.get, | ||||
|                                           {'data' => datafile}) | ||||
|   return status | ||||
| end | ||||
| 
 | ||||
| 
 | ||||
| ## | ||||
| # Checks the training status until a model exists (will loop forever). | ||||
| # | ||||
| # @param [String] filename The name of the file in Google Storage. NOTE: this do *not* | ||||
| #                 include the gs:// part. If the Google Storage path is gs://bucket/object, | ||||
| #                 then the correct string is "bucket/object" | ||||
| # @return [Bool] exists True if model exists and can be used for predictions. | ||||
| 
 | ||||
| def is_done?(datafile) | ||||
|   status = get_training_status(datafile) | ||||
|   while true do | ||||
|     puts "Attempting to check model #{datafile} - Status: #{status} " | ||||
|     return true if status == 200 | ||||
|     sleep 10 | ||||
|     status = get_training_status(datafile) | ||||
|   end | ||||
|   return false | ||||
| end | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ## | ||||
| # Returns the prediction and most most likely class score if categorization. | ||||
| # | ||||
| # @param [String] filename The name of the file in Google Storage. NOTE: this do *not* | ||||
| #                 include the gs:// part. If the Google Storage path is gs://bucket/object, | ||||
| #                 then the correct string is "bucket/object" | ||||
| # @param [List] input_features A list of input features. | ||||
| # | ||||
| # @return [String or Double] prediction The returned prediction, String if categorization, | ||||
| #                            Double if regression | ||||
| # @return [Double] trueclass_score The numeric score of the most likely label. (Categorical only). | ||||
| 
 | ||||
| def get_prediction(datafile,input_features) | ||||
|   # We take the input features and put it in the right input (json) format. | ||||
|   input="{\"input\" : { \"csvInstance\" :  #{input_features}}}" | ||||
|   puts "Prediction Input: #{input}" | ||||
|   status, headers, body = @client.execute(@prediction.training.predict, | ||||
|                                                      {'data' => datafile}, | ||||
|                                                      input, | ||||
|                                                      {'Content-Type' => 'application/json'}) | ||||
|   prediction_data = JSON.parse(body[0]) | ||||
|    | ||||
|   # Categorical | ||||
|   if prediction_data["outputLabel"] != nil | ||||
|     # Pull the most likely label. | ||||
|     prediction = prediction_data["outputLabel"] | ||||
|     # Pull the class probabilities. | ||||
|     probs = prediction_data["outputMulti"] | ||||
|     puts probs | ||||
|     # Verify we are getting a value result. | ||||
|     puts ["ERROR", input_features].join("\t")  if probs.nil? | ||||
|     return "error", -1.0 if probs.nil? | ||||
| 
 | ||||
|     # Extract the score for the most likely class. | ||||
|     trueclass_score = probs.select{|hash| | ||||
|       hash["label"] ==  prediction | ||||
|     }[0]["score"] | ||||
| 
 | ||||
|     # Regression. | ||||
|   else | ||||
|     prediction = prediction_data["outputValue"] | ||||
|     # Class core unused. | ||||
|     trueclass_score = -1 | ||||
|   end | ||||
| 
 | ||||
|   puts [prediction,trueclass_score,input_features].join("\t")  | ||||
|   return prediction,trueclass_score | ||||
| end | ||||
| 
 | ||||
|  | @ -0,0 +1,16 @@ | |||
| #!/bin/bash | ||||
| # | ||||
| # Copyright 2011 Google Inc. All Rights Reserved. | ||||
| # Author: rkaplow@google.com (Robert Kaplow) | ||||
| # | ||||
| # Uploads a training data set to Google Storage to be used by this sample | ||||
| # application.  | ||||
| # | ||||
| # Usage: | ||||
| # setup.sh bucket/object  | ||||
| # | ||||
| # Requirements: | ||||
| #   gsutil - a client application for interacting with Google Storage. It | ||||
| #     can be downloaded from https://code.google.com/apis/storage/docs/gsutil.html | ||||
| OBJECT_NAME=$1 | ||||
| gsutil cp language_id.txt gs://$OBJECT_NAME | ||||
		Loading…
	
		Reference in New Issue