#!/usr/bin/env python # coding: utf-8 # Get a temporary access token here: # # https://developers.facebook.com/tools/accesstoken/ # # https://techcrunch.com/2015/04/28/facebook-api-shut-down/ # # Mining the Social Web, 2nd Edition # # ## Chapter 2: Mining Facebook: Analyzing Fan Pages, Examining Friendships, and More # # This IPython Notebook provides an interactive way to follow along with and explore the numbered examples from [_Mining the Social Web (2nd Edition)_](http://bit.ly/135dHfs). The intent behind this notebook is to reinforce the concepts from the sample code in a fun, convenient, and effective way. This notebook assumes that you are reading along with the book and have the context of the discussion as you work through these exercises. # # In the somewhat unlikely event that you've somehow stumbled across this notebook outside of its context on GitHub, [you can find the full source code repository here](http://bit.ly/16kGNyb). # # ## Copyright and Licensing # # You are free to use or adapt this notebook for any purpose you'd like. However, please respect the [Simplified BSD License](https://github.com/ptwobrussell/Mining-the-Social-Web-2nd-Edition/blob/master/LICENSE.txt) that governs its use. # # Facebook API Access # # Facebook implements OAuth 2.0 as its standard authentication mechanism, but provides a convenient way for you to get an _access token_ for development purposes, and we'll opt to take advantage of that convenience in this notebook. For details on implementing an OAuth flow with Facebook (all from within IPython Notebook), see the \_AppendixB notebook from the [IPython Notebook Dashboard](/). # # For this first example, login to your Facebook account and go to https://developers.facebook.com/tools/explorer/ to obtain and set permissions for an access token that you will need to define in the code cell defining the ACCESS_TOKEN variable below. # # Be sure to explore the permissions that are available by clicking on the "Get Access Token" button that's on the page and exploring all of the tabs available. For example, you will need to set the "friends_likes" option under the "Friends Data Permissions" since this permission is used by the script below but is not a basic permission and is not enabled by default. # #
# In[ ]: # Copy and paste in the value you just got from the inline frame into this variable and execute this cell. # Keep in mind that you could have just gone to https://developers.facebook.com/tools/access_token/ # and retrieved the "User Token" value from the Access Token Tool ACCESS_TOKEN = '' # ## Example 1. Making Graph API requests over HTTP # In[ ]: import requests # pip install requests import json base_url = 'https://graph.facebook.com/me' # Get 10 likes for 10 friends fields = 'id,name,friends.limit(10).fields(likes.limit(10))' url = '%s?fields=%s&access_token=%s' % \ (base_url, fields, ACCESS_TOKEN,) # This API is HTTP-based and could be requested in the browser, # with a command line utlity like curl, or using just about # any programming language by making a request to the URL. # Click the hyperlink that appears in your notebook output # when you execute this code cell to see for yourself... print(url) # Interpret the response as JSON and convert back # to Python data structures content = requests.get(url).json() # Pretty-print the JSON and display it print(json.dumps(content, indent=1)) # Note: If you attempt to run a query for all of your friends' likes and it appears to hang, it is probably because you have a lot of friends who have a lot of likes. If this happens, you may need to add limits and offsets to the fields in the query as described in Facebook's [field expansion](https://developers.facebook.com/docs/reference/api/field_expansion/) documentation. However, the facebook library that we'll use in the next example handles some of these issues, so it's recommended that you hold off and try it out first. This initial example is just to illustrate that Facebook's API is built on top of HTTP. # # A couple of field limit/offset examples that illustrate the possibilities follow: # # # fields = 'id,name,friends.limit(10).fields(likes)' # Get all likes for 10 friends # fields = 'id,name,friends.offset(10).limit(10).fields(likes)' # Get all likes for 10 more friends # fields = 'id,name,friends.fields(likes.limit(10))' # Get 10 likes for all friends # # ## Example 2. Querying the Graph API with Python # In[ ]: import facebook # pip install facebook-sdk import json # A helper function to pretty-print Python objects as JSON def pp(o): print(json.dumps(o, indent=1)) # Create a connection to the Graph API with your access token g = facebook.GraphAPI(ACCESS_TOKEN, version='2.7') # Execute a few sample queries print('---------------') print('Me') print('---------------') pp(g.get_object('me')) print() print('---------------') print('My Friends') print('---------------') pp(g.get_connections('me', 'friends')) print() print('---------------') print('Social Web') print('---------------') pp(g.request("search", {'q' : 'social web', 'type' : 'page'})) # ## Example 3. Results for a Graph API query for Mining the Social Web # In[ ]: # Get an instance of Mining the Social Web # Using the page name also works if you know it. # e.g. 'MiningTheSocialWeb' or 'CrossFit' mtsw_id = '146803958708175' pp(g.get_object(mtsw_id)) # ## Example 4. Querying the Graph API for Open Graph objects by their URLs # In[ ]: # MTSW catalog link pp(g.get_object('http://shop.oreilly.com/product/0636920030195.do')) # PCI catalog link pp(g.get_object('http://shop.oreilly.com/product/9780596529321.do')) # ## Example 5. Comparing likes between Coke and Pepsi fan pages # In[ ]: # Find Pepsi and Coke in search results pp(g.request('search', {'q' : 'pepsi', 'type' : 'page', 'limit' : 5})) pp(g.request('search', {'q' : 'coke', 'type' : 'page', 'limit' : 5})) # Use the ids to query for likes pepsi_id = '56381779049' # Could also use 'PepsiUS' coke_id = '40796308305' # Could also use 'CocaCola' # A quick way to format integers with commas every 3 digits def int_format(n): return "{:,}".format(n) print("Pepsi likes:", int_format(g.get_object(id=pepsi_id, fields=['fan_count'])['fan_count'])) print("Coke likes:", int_format(g.get_object(id=coke_id, fields=['fan_count'])['fan_count'])) # ## Example 6. Querying a page for its "feed" # # When querying the 'feed' you get all the posts made by the page owner and visitors. If you query only 'posts', you just get the page owner's posts. # In[ ]: pp(g.get_connections(pepsi_id, 'posts')) pp(g.get_connections(coke_id, 'posts')) # ## Example 7. Counting total number of page fans and retrieving feed # In[ ]: # Take, for example, three popular musicians and their page IDs. # Other artists' page IDs can be retrieved using the same search # tools as in the Coke vs Pepsi example. taylor_swift_id = '19614945368' drake_id = '83711079303' beyonce_id = '28940545600' # Declare a helper function for retrieving the total number of # fans ('likes') a page has def get_total_fans(page_id): return int(g.get_object(id=page_id, fields=['fan_count'])['fan_count']) # Declare a helper function for retrieving the official feed from # a given page. def retrieve_page_feed(page_id, n_posts): """Retrieve the first n_posts from a page's feed in reverse chronological order.""" feed = g.get_connections(page_id, 'posts') posts = [] posts.extend(feed['data']) if len(posts) > n_posts: posts = posts[:n_posts] while len(posts) < n_posts: try: feed = requests.get(feed['paging']['next']).json() posts.extend(feed['data']) except KeyError: # When there are no more posts in the feed, break print('Reached end of feed.') break print('{} items retrieved from feed'.format(len(posts))) return posts # Declare a helper function for returning the message content # of a post def get_post_message(post): try: message = post['story'] except KeyError: # Post may have 'message' instead of 'story' pass try: message = post['message'] except KeyError: # Post has neither message = '' return message tswift_fans = get_total_fans(taylor_swift_id) drake_fans = get_total_fans(drake_id) beyonce_fans = get_total_fans(beyonce_id) print('Taylor Swift: {0} fans on Facebook'.format(int_format(tswift_fans))) print('Drake: {0} fans on Facebook'.format(int_format(drake_fans))) print('Beyoncé: {0} fans on Facebook'.format(int_format(beyonce_fans))) # Retrieve the last 5 items from their feeds for artist in [taylor_swift_id, drake_id, beyonce_id]: print() feed = retrieve_page_feed(artist, 5) for i, post in enumerate(feed): message = get_post_message(post)[:50] print('{0} - {1}...'.format(i+1, message)) # ## Example 8. Measuring engagement # In[ ]: # Measure the reaction to a post in terms of likes, shares, and comments def measure_reaction(post_id): """Returns the number of likes, shares, and comments on a given post as a measure of user engagement.""" likes = g.get_object(id=post_id, fields=['likes.limit(0).summary(true)'])\ ['likes']['summary']['total_count'] shares = g.get_object(id=post_id, fields=['shares.limit(0).summary(true)'])\ ['shares']['count'] comments = g.get_object(id=post_id, fields=['comments.limit(0).summary(true)'])\ ['comments']['summary']['total_count'] return likes, shares, comments # Measure the relative share of a page's fans engaging with a post def measure_engagement(post_id, total_fans): """Returns the number of likes, shares, and comments on a given post as a measure of user engagement.""" likes = g.get_object(id=post_id, fields=['likes.limit(0).summary(true)'])\ ['likes']['summary']['total_count'] shares = g.get_object(id=post_id, fields=['shares.limit(0).summary(true)'])\ ['shares']['count'] comments = g.get_object(id=post_id, fields=['comments.limit(0).summary(true)'])\ ['comments']['summary']['total_count'] likes_pct = likes / total_fans * 100.0 shares_pct = shares / total_fans * 100.0 comments_pct = comments / total_fans * 100.0 return likes_pct, shares_pct, comments_pct # Retrieve the last 5 items from the artists' feeds, print the # reaction and the degree of engagement artist_dict = {'Taylor Swift': taylor_swift_id, 'Drake': drake_id, 'Beyoncé': beyonce_id} for name, page_id in artist_dict.items(): print() print(name) print('------------') feed = retrieve_page_feed(page_id, 5) total_fans = get_total_fans(page_id) for i, post in enumerate(feed): message = get_post_message(post)[:30] post_id = post['id'] likes, shares, comments = measure_reaction(post_id) likes_pct, shares_pct, comments_pct = measure_engagement(post_id, total_fans) print('{0} - {1}...'.format(i+1, message)) print(' Likes {0} ({1:7.5f}%)'.format(likes, likes_pct)) print(' Shares {0} ({1:7.5f}%)'.format(shares, shares_pct)) print(' Comments {0} ({1:7.5f}%)'.format(comments, comments_pct)) # ## Example 9. Visualizing a musician's Facebook fan engagement # In[ ]: import pandas as pd import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: # Create a Pandas DataFrame to contain artist page # feed information columns = ['Name', 'Total Fans', 'Post Number', 'Post Date', 'Headline', 'Likes', 'Shares', 'Comments', 'Rel. Likes', 'Rel. Shares', 'Rel. Comments'] musicians = pd.DataFrame(columns=columns) # Build the DataFrame by adding the last 10 posts and their audience # reaction for each of the artists for page_id in [taylor_swift_id, drake_id, beyonce_id]: name = g.get_object(id=page_id)['name'] fans = get_total_fans(page_id) feed = retrieve_page_feed(page_id, 10) for i, post in enumerate(feed): likes, shares, comments = measure_reaction(post['id']) likes_pct, shares_pct, comments_pct = measure_engagement(post['id'], fans) musicians = musicians.append({'Name': name, 'Total Fans': fans, 'Post Number': i+1, 'Post Date': post['created_time'], 'Headline': get_post_message(post), 'Likes': likes, 'Shares': shares, 'Comments': comments, 'Rel. Likes': likes_pct, 'Rel. Shares': shares_pct, 'Rel. Comments': comments_pct, }, ignore_index=True) # Fix the dtype of a few columns for col in ['Post Number', 'Total Fans', 'Likes', 'Shares', 'Comments']: musicians[col] = musicians[col].astype(int) # Show a preview of the DataFrame musicians.head() # In[ ]: musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Likes', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Shares', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Comments', kind='bar') # In[ ]: musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Likes', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Shares', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Comments', kind='bar') # ## Example 10. Comparing different artists to each other # In[ ]: # Reset the index to a multi-index musicians = musicians.set_index(['Name','Post Number']) # In[ ]: # The unstack method pivots the index labels # and lets you get data columns grouped by artist musicians.unstack(level=0)['Likes'] # In[ ]: # Plot the comparative reactions to each artist's last 10 Facebook posts ax = musicians.unstack(level=0)['Likes'].plot(kind='bar', subplots=False, figsize=(10,5), width=0.8) ax.set_xlabel('10 Latest Posts') ax.set_ylabel('Number of Likes Received') # In[ ]: # Plot the engagement of each artist's Facebook fan base to the last 10 posts ax = musicians.unstack(level=0)['Rel. Likes'].plot(kind='bar', subplots=False, figsize=(10,5), width=0.8) ax.set_xlabel('10 Latest Posts') ax.set_ylabel('Likes / Total Fans (%)') # ## Example 11. Calculate average engagement # In[ ]: print('Average Likes / Total Fans') print(musicians.unstack(level=0)['Rel. Likes'].mean()) print('\nAverage Shares / Total Fans') print(musicians.unstack(level=0)['Rel. Shares'].mean()) print('\nAverage Comments / Total Fans') print(musicians.unstack(level=0)['Rel. Comments'].mean()) # In[ ]: