#!/usr/bin/env python # coding: utf-8 # # Mining the Social Web # # ## Mining Facebook # # This Jupyter Notebook provides an interactive way to follow along with the video lectures. The intent behind this notebook is to reinforce the concepts in a fun, convenient, and effective way. # ## Facebook API Access # # Facebook implements OAuth 2.0 as its standard authentication mechanism, but provides a convenient way for you to get an _access token_ for development purposes, and we'll opt to take advantage of that convenience in this notebook. # # To get started, log in to your Facebook account and go to https://developers.facebook.com/tools/explorer/ to obtain an ACCESS_TOKEN, and then paste it into the code cell below. # In[ ]: # Copy and paste in the value you just got from the inline frame into this variable and execute this cell. # Keep in mind that you could have just gone to https://developers.facebook.com/tools/access_token/ # and retrieved the "User Token" value from the Access Token Tool ACCESS_TOKEN = '' # ## Example 1. Making Graph API requests over HTTP # In[ ]: import requests # pip install requests import json base_url = 'https://graph.facebook.com/me' # Specify which fields to retrieve fields = 'id,name,likes.limit(10){about}' url = '{0}?fields={1}&access_token={2}'.format(base_url, fields, ACCESS_TOKEN) # This API is HTTP-based and could be requested in the browser, # with a command line utlity like curl, or using just about # any programming language by making a request to the URL. # Click the hyperlink that appears in your notebook output # when you execute this code cell to see for yourself... print(url) # Interpret the response as JSON and convert back # to Python data structures content = requests.get(url).json() # Pretty-print the JSON and display it print(json.dumps(content, indent=1)) # ## Example 2. Querying the Graph API with Python # # Facebook SDK for Python API reference: # http://facebook-sdk.readthedocs.io/en/v2.0.0/api.html # In[ ]: import facebook # pip install facebook-sdk import json # A helper function to pretty-print Python objects as JSON def pp(o): print(json.dumps(o, indent=1)) # Create a connection to the Graph API with your access token g = facebook.GraphAPI(ACCESS_TOKEN, version='2.7') # Execute a few example queries: # Get my ID pp(g.get_object('me')) # In[ ]: # Get the connections to an ID # Example connection names: 'feed', 'likes', 'groups', 'posts' pp(g.get_connections(id='me', connection_name='likes')) # In[ ]: # Search for a location, may require approved app pp(g.request("search", {'type': 'place', 'center': '40.749444, -73.968056', 'fields': 'name, location'})) # ## Example 3. Querying the Graph API for Mining the Social Web and Counting Fans # In[ ]: # Search for a page's ID by name pp(g.request("search", {'q': 'Mining the Social Web', 'type': 'page'})) # Grab the ID for the book and check the number of fans mtsw_id = '146803958708175' pp(g.get_object(id=mtsw_id, fields=['fan_count'])) # ## Example 4. Querying the Graph API for Open Graph objects by their URLs # In[ ]: # MTSW catalog link pp(g.get_object('http://shop.oreilly.com/product/0636920030195.do')) # PCI catalog link pp(g.get_object('http://shop.oreilly.com/product/9780596529321.do')) # ## Example 5. Counting total number of page fans # In[ ]: # The following code may require the developer's app be submitted for review and # approved. See https://developers.facebook.com/docs/apps/review # Take, for example, three popular musicians and their page IDs. taylor_swift_id = '19614945368' drake_id = '83711079303' beyonce_id = '28940545600' # Declare a helper function for retrieving the total number of fans ('likes') a page has def get_total_fans(page_id): return int(g.get_object(id=page_id, fields=['fan_count'])['fan_count']) tswift_fans = get_total_fans(taylor_swift_id) drake_fans = get_total_fans(drake_id) beyonce_fans = get_total_fans(beyonce_id) print('Taylor Swift: {0} fans on Facebook'.format(tswift_fans)) print('Drake: {0} fans on Facebook'.format(drake_fans)) print('Beyoncé: {0} fans on Facebook'.format(beyonce_fans)) # ## Example 6. Retrieving a page's feed # In[ ]: # Declare a helper function for retrieving the official feed from a given page. def retrieve_page_feed(page_id, n_posts): """Retrieve the first n_posts from a page's feed in reverse chronological order.""" feed = g.get_connections(page_id, 'posts') posts = [] posts.extend(feed['data']) while len(posts) < n_posts: try: feed = requests.get(feed['paging']['next']).json() posts.extend(feed['data']) except KeyError: # When there are no more posts in the feed, break print('Reached end of feed.') break if len(posts) > n_posts: posts = posts[:n_posts] print('{} items retrieved from feed'.format(len(posts))) return posts # Declare a helper function for returning the message content of a post def get_post_message(post): try: message = post['story'] except KeyError: # Post may have 'message' instead of 'story' pass try: message = post['message'] except KeyError: # Post has neither message = '' return message.replace('\n', ' ') # Retrieve the last 5 items from their feeds for artist in [taylor_swift_id, drake_id, beyonce_id]: print() feed = retrieve_page_feed(artist, 5) for i, post in enumerate(feed): message = get_post_message(post)[:50] print('{0} - {1}...'.format(i+1, message)) # ## Example 7. Measuring engagement # In[ ]: # Measure the response to a post in terms of likes, shares, and comments def measure_response(post_id): """Returns the number of likes, shares, and comments on a given post as a measure of user engagement.""" likes = g.get_object(id=post_id, fields=['likes.limit(0).summary(true)'])\ ['likes']['summary']['total_count'] shares = g.get_object(id=post_id, fields=['shares.limit(0).summary(true)'])\ ['shares']['count'] comments = g.get_object(id=post_id, fields=['comments.limit(0).summary(true)'])\ ['comments']['summary']['total_count'] return likes, shares, comments # Measure the relative share of a page's fans engaging with a post def measure_engagement(post_id, total_fans): """Returns the number of likes, shares, and comments on a given post as a measure of user engagement.""" likes = g.get_object(id=post_id, fields=['likes.limit(0).summary(true)'])\ ['likes']['summary']['total_count'] shares = g.get_object(id=post_id, fields=['shares.limit(0).summary(true)'])\ ['shares']['count'] comments = g.get_object(id=post_id, fields=['comments.limit(0).summary(true)'])\ ['comments']['summary']['total_count'] likes_pct = likes / total_fans * 100.0 shares_pct = shares / total_fans * 100.0 comments_pct = comments / total_fans * 100.0 return likes_pct, shares_pct, comments_pct # Retrieve the last 5 items from the artists' feeds, print the # reaction and the degree of engagement artist_dict = {'Taylor Swift': taylor_swift_id, 'Drake': drake_id, 'Beyoncé': beyonce_id} for name, page_id in artist_dict.items(): print() print(name) print('------------') feed = retrieve_page_feed(page_id, 5) total_fans = get_total_fans(page_id) for i, post in enumerate(feed): message = get_post_message(post)[:30] post_id = post['id'] likes, shares, comments = measure_response(post_id) likes_pct, shares_pct, comments_pct = measure_engagement(post_id, total_fans) print('{0} - {1}...'.format(i+1, message)) print(' Likes {0} ({1:7.5f}%)'.format(likes, likes_pct)) print(' Shares {0} ({1:7.5f}%)'.format(shares, shares_pct)) print(' Comments {0} ({1:7.5f}%)'.format(comments, comments_pct)) # ## Example 8. Storing data in a pandas DataFrame # In[ ]: import pandas as pd # pip install pandas # Create a Pandas DataFrame to contain artist page # feed information columns = ['Name', 'Total Fans', 'Post Number', 'Post Date', 'Headline', 'Likes', 'Shares', 'Comments', 'Rel. Likes', 'Rel. Shares', 'Rel. Comments'] musicians = pd.DataFrame(columns=columns) # Build the DataFrame by adding the last 10 posts and their audience # reaction for each of the artists for page_id in [taylor_swift_id, drake_id, beyonce_id]: name = g.get_object(id=page_id)['name'] fans = get_total_fans(page_id) feed = retrieve_page_feed(page_id, 10) for i, post in enumerate(feed): likes, shares, comments = measure_response(post['id']) likes_pct, shares_pct, comments_pct = measure_engagement(post['id'], fans) musicians = musicians.append({'Name': name, 'Total Fans': fans, 'Post Number': i+1, 'Post Date': post['created_time'], 'Headline': get_post_message(post), 'Likes': likes, 'Shares': shares, 'Comments': comments, 'Rel. Likes': likes_pct, 'Rel. Shares': shares_pct, 'Rel. Comments': comments_pct, }, ignore_index=True) # Fix the dtype of a few columns for col in ['Post Number', 'Total Fans', 'Likes', 'Shares', 'Comments']: musicians[col] = musicians[col].astype(int) # In[ ]: # Show a preview of the DataFrame musicians.head() # ## Example 9. Visualizing data stored in a pandas DataFrame # In[ ]: import matplotlib # pip install matplotlib get_ipython().run_line_magic('matplotlib', 'inline') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Likes', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Shares', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Comments', kind='bar') # In[ ]: musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Likes', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Shares', kind='bar') musicians[musicians['Name'] == 'Drake'].plot(x='Post Number', y='Rel. Comments', kind='bar') # ## Example 10. Comparing different artists to each other # In[ ]: # Reset the index to a multi-index musicians = musicians.set_index(['Name','Post Number']) # In[ ]: # The unstack method pivots the index labels # and lets you get data columns grouped by artist musicians.unstack(level=0)['Likes'] # In[ ]: # Plot the comparative reactions to each artist's last 10 Facebook posts plot = musicians.unstack(level=0)['Likes'].plot(kind='bar', subplots=False, figsize=(10,5), width=0.8) plot.set_xlabel('10 Latest Posts') plot.set_ylabel('Number of Likes Received') # In[ ]: # Plot the engagement of each artist's Facebook fan base to the last 10 posts plot = musicians.unstack(level=0)['Rel. Likes'].plot(kind='bar', subplots=False, figsize=(10,5), width=0.8) plot.set_xlabel('10 Latest Posts') plot.set_ylabel('Likes / Total Fans (%)') # ## Example 11. Calculate average engagement # In[ ]: print('Average Likes / Total Fans') print(musicians.unstack(level=0)['Rel. Likes'].mean()) print('\nAverage Shares / Total Fans') print(musicians.unstack(level=0)['Rel. Shares'].mean()) print('\nAverage Comments / Total Fans') print(musicians.unstack(level=0)['Rel. Comments'].mean())