Using Google Trends API from SerpApi

Using Google Trends API from SerpApi

This blog post is a step-by-step tutorial about scraping Google Trends using SerpApi.

ยท

9 min read

What will be scraped

wwlb-google-trends

Why using API?

  • No need to create a parser from scratch and maintain it.
  • Bypass blocks from Google: solve CAPTCHA or solve IP blocks.
  • Pay for proxies, and CAPTCHA solvers.
  • Don't need to use browser automation.

SerpApi handles everything on the backend with fast response times under ~2.5 seconds (~1.2 seconds with Ludicrous speed) per request and without browser automation, which becomes much faster. Response times and status rates are shown under SerpApi Status page.

serpapi-status-page

Full Code

If you don't need explanation, have a look at full code example in the online IDE.

from serpapi import GoogleSearch
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os, json

params = {
    # https://docs.python.org/3/library/os.html#os.getenv
    'api_key': os.getenv('API_KEY'),        # your serpapi api
    'engine': 'google_trends',                # SerpApi search engine    
    'date': 'today 12-m',                    # by default Past 12 months
    'cat': 0,                                # by default All categories
    # 'geo': '',                            # by default Worldwide
    # 'gprop': 'images',                    # by default Web Search
    # 'data_type': '',                        # type of search (defined in the function)
    # 'q': '',                                # query (defined in the function)
}


def scrape_google_trends(data_type: str, key: str, query: str):
    params['data_type'] = data_type
    params['q'] = query

    search = GoogleSearch(params)           # where data extraction happens on the SerpApi backend
    results = search.get_dict()             # JSON -> Python dict

    return results[key] if not results['error'] else results['error']


def get_all_data():
    data = {
        'interest_over_time': {},
        'compared_breakdown_by_region': [],
        'interest_by_region': [],
        'related_topics': {},
        'related_queries': {}
    }

    interest_over_time = scrape_google_trends('TIMESERIES', 'interest_over_time', 'Mercedes,BMW,Audi')
    data['interest_over_time'] = interest_over_time

    compared_breakdown_by_region = scrape_google_trends('GEO_MAP', 'compared_breakdown_by_region', 'Mercedes,BMW,Audi')
    data['compared_breakdown_by_region'] = compared_breakdown_by_region

    interest_by_region = scrape_google_trends('GEO_MAP_0', 'interest_by_region', 'Mercedes')
    data['interest_by_region'] = interest_by_region

    related_topics = scrape_google_trends('RELATED_TOPICS', 'related_topics', 'Mercedes')
    data['related_topics'] = related_topics

    related_queries = scrape_google_trends('RELATED_QUERIES', 'related_queries', 'Mercedes')
    data['related_queries'] = related_queries

    return data


def plot_interest_over_time(data: dict):
    timeseries = []

    # Extracting data
    for result in data['interest_over_time']['timeline_data']:
        for value in result['values']:
            query = value['query']
            extracted_value = value['extracted_value']

            timeseries.append({
                'timestamp': result['timestamp'],
                'query': query,
                'extracted_value': extracted_value,
            })

    # Plotting data
    df = pd.DataFrame(data=timeseries)

    sns.set(rc={'figure.figsize': (13, 5)})
    palette = sns.color_palette('mako_r', 3)  # 3 is number of colors

    ax = sns.lineplot(
        data=df,
        x='timestamp',
        y='extracted_value',
        hue='query',
        palette=palette,
    )

    plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left', borderaxespad=0)
    ax.set(xticklabels=[])
    ax.set(xlabel='')

    plt.show()


if __name__ == "__main__":
    google_trends_result = get_all_data()
    print(json.dumps(google_trends_result, indent=2, ensure_ascii=False))
    plot_interest_over_time(google_trends_result)

Preparation

Install library:

pip install google-search-results matplotlib pandas seaborn

google-search-results is a SerpApi API package.

Code Explanation

Import libraries:

from serpapi import GoogleSearch
import os, json
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
LibraryPurpose
GoogleSearchto scrape and parse Google results using SerpApi web scraping library.
osto return environment variable (SerpApi API key) value.
jsonto convert extracted data to a JSON object.
matplotlib.pyplotto provide a state-based interface to matplotlib. It provides an implicit, MATLAB-like, way of plotting. It also opens figures on your screen and acts as the figure GUI manager.
pandasto provide high-performance, easy-to-use data structures and data analysis tools.
seabornto provide a high-level interface for drawing attractive and informative statistical graphics.

Next, we write a parameters for making a request:

params = {
    # https://docs.python.org/3/library/os.html#os.getenv
    'api_key': os.getenv('API_KEY'),        # your serpapi api
    'engine': 'google_trends',                # SerpApi search engine    
    'date': 'today 12-m',                    # by default Past 12 months
    'cat': 0,                                # by default All categories
    # 'geo': '',                            # by default Worldwide
    # 'gprop': 'images',                    # by default Web Search
    # 'data_type': '',                        # type of search (defined in the function)
    # 'q': '',                                # query (defined in the function)
}
ParametersExplanation
api_keyParameter defines the SerpApi private key to use.
engineSet parameter to google_trends to use the Google Trends API engine.
dateParameter is used to define a date (e.g. today 12-m - Past 12 months).
catParameter is used to define a search category. The default value is set to 0 ("All categories"). Head to the Google Trends Categories for a full list of supported Google Trends Categories.
geoParameter defines the location from where you want the search to originate. It defaults to Worldwide (activated when the value of geo parameter is not set or empty). Head to the Google Trends Locations for a full list of supported Google Trends locations.
gpropParameter is used for sorting results by property. The default property is set to Web Search (activated when the value of gprop parameter is not set or empty).
data_typeParameter defines the type of search you want to do.
qParameter defines the query or queries you want to search.

This function returns part of the requested data. It also takes the following 3 parameters:

  • data_type - the value of the corresponding parameter, which defines the type of search.
  • key - the key by which the required data is found.
  • query - the value of anything that you would use in a regular Google Trends search.

The parameters for the current request are determined:

params['data_type'] = data_type
params['q'] = query

We then create a search object where the data is retrieved from the SerpApi backend. In the result_dict dictionary we get data from JSON:

search = GoogleSearch(params)   # where data extraction happens on the SerpApi backend
results = search.get_dict()     # JSON -> Python dict

At the end of the function, you need to return the data that is located by the passed key, if any:

return results[key] if not results['error'] else results['error']

The complete function to scrape Google Trends would look like this:

def scrape_google_trends(data_type: str, key: str, query: str):
    params['data_type'] = data_type
    params['q'] = query

    search = GoogleSearch(params)   # where data extraction happens on the SerpApi backend
    results = search.get_dict()     # JSON -> Python dict

    return results[key] if 'error' not in results else results['error']

Get all data

This function returns a list with all data.

At the beginning of the function, you must declare the structure of the data list, which will store all the data:

data = {
    'interest_over_time': {},
    'compared_breakdown_by_region': [],
    'interest_by_region': [],
    'related_topics': {},
    'related_queries': {}
}

For each chart, you need to get a piece of data by calling the scrape_google_trends(<data_type>, <key>, <query>) function with the appropriate parameters. Then all this data is added to the data list:

interest_over_time = scrape_google_trends('TIMESERIES', 'interest_over_time', 'Mercedes,BMW,Audi')
data['interest_over_time'] = interest_over_time

compared_breakdown_by_region = scrape_google_trends('GEO_MAP', 'compared_breakdown_by_region', 'Mercedes,BMW,Audi')
data['compared_breakdown_by_region'] = compared_breakdown_by_region

interest_by_region = scrape_google_trends('GEO_MAP_0', 'interest_by_region', 'Mercedes')
data['interest_by_region'] = interest_by_region

related_topics = scrape_google_trends('RELATED_TOPICS', 'related_topics', 'Mercedes')
data['related_topics'] = related_topics

related_queries = scrape_google_trends('RELATED_QUERIES', 'related_queries', 'Mercedes')
data['related_queries'] = related_queries
Chartdata_typekeyquery
Interest over time (default)TIMESERIESinterest_over_timeAccepts both single and multiple queries per search.
Compared breakdown by regionGEO_MAPcompared_breakdown_by_regionAccepts only multiple queries per search.
Interest by regionGEO_MAP_0interest_by_regionAccepts only single query per search.
Related topicsRELATED_TOPICSrelated_topicsAccepts only single query per search.
Related queriesRELATED_QUERIESrelated_queriesAccepts only single query per search.

The maximum number of queries per search is 5 (this only applies to "Interest over time" and "Compared breakdown by region" data_type, other types of data will only accept 1 query per search).

When passing multiple queries you need to use a comma (,) to separate them (e.g. Mercedes,BMW,Audi).

At the end of the function, you need to return data:

return data

The complete function to get all data would look like this:

def get_all_data():
    data = {
        'interest_over_time': {},
        'compared_breakdown_by_region': [],
        'interest_by_region': [],
        'related_topics': {},
        'related_queries': {}
    }

    interest_over_time = scrape_google_trends('TIMESERIES', 'interest_over_time', 'Mercedes,BMW,Audi')
    data['interest_over_time'] = interest_over_time

    compared_breakdown_by_region = scrape_google_trends('GEO_MAP', 'compared_breakdown_by_region', 'Mercedes,BMW,Audi')
    data['compared_breakdown_by_region'] = compared_breakdown_by_region

    interest_by_region = scrape_google_trends('GEO_MAP_0', 'interest_by_region', 'Mercedes')
    data['interest_by_region'] = interest_by_region

    related_topics = scrape_google_trends('RELATED_TOPICS', 'related_topics', 'Mercedes')
    data['related_topics'] = related_topics

    related_queries = scrape_google_trends('RELATED_QUERIES', 'related_queries', 'Mercedes')
    data['related_queries'] = related_queries

    return data

Plot interest over time

This function takes a dictionary with the retrieved data and returns a graph based on the Interest over time chart.

At the beginning of the function, you must declare a timeseries list and fill it with the necessary data for plotting:

timeseries = []

# Extracting data
for result in data['interest_over_time']['timeline_data']:
    for value in result['values']:
        query = value['query']
        extracted_value = value['extracted_value']

        timeseries.append({
            'timestamp': result['timestamp'],
            'query': query,
            'extracted_value': extracted_value,
        })

The next step is to plot the graph:

# Plotting data
df = pd.DataFrame(data=timeseries)

sns.set(rc={'figure.figsize': (13, 5)})
palette = sns.color_palette('mako_r', 3)  # 3 is number of colors

ax = sns.lineplot(
    data=df,
    x='timestamp',
    y='extracted_value',
    hue='query',
    palette=palette,
)

plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left', borderaxespad=0)
ax.set(xticklabels=[])
ax.set(xlabel='')

plt.show()

The complete function to show all interest over time would look like this:

def plot_interest_over_time(data: dict):
    timeseries = []

    # Extracting data
    for result in data['interest_over_time']['timeline_data']:
        for value in result['values']:
            query = value['query']
            extracted_value = value['extracted_value']

            timeseries.append({
                'timestamp': result['timestamp'],
                'query': query,
                'extracted_value': extracted_value,
            })

    # Plotting data
    df = pd.DataFrame(data=timeseries)

    sns.set(rc={'figure.figsize': (13, 5)})
    palette = sns.color_palette('mako_r', 3)  # 3 is number of colors

    ax = sns.lineplot(
        data=df,
        x='timestamp',
        y='extracted_value',
        hue='query',
        palette=palette,
    )

    plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left', borderaxespad=0)
    ax.set(xticklabels=[])
    ax.set(xlabel='')

    plt.show()

To demonstrate how the function works, the following parameters were passed and the corresponding data was generated:

params = {
    'date': 'all',                    # 2004 - present
    'q': 'london,new york,brazil',    # query
}

The result of the function is below:

Google Trends
image
Resulted graph
image

Top-level code environment

The last step would be to add a Python idiom to make sure that readers understand this code as an executable script. Inside we call the get_all_data() function and print the received data:

if __name__ == "__main__":
    google_trends_result = get_all_data()
    print(json.dumps(google_trends_result, indent=2, ensure_ascii=False))
    # plot_interest_over_time(google_trends_result)

Output

{
  "interest_over_time": {
    "timeline_data": [
      {
        "date": "Oct 24 โ€“ 30, 2021",
        "values": [
          {
            "query": "Mercedes",
            "value": "56",
            "extracted_value": 56
          },
          {
            "query": "BMW",
            "value": "80",
            "extracted_value": 80
          },
          {
            "query": "Audi",
            "value": "43",
            "extracted_value": 43
          }
        ]
      },
      ... other results
      {
        "date": "Oct 16 โ€“ 22, 2022",
        "values": [
          {
            "query": "Mercedes",
            "value": "52",
            "extracted_value": 52
          },
          {
            "query": "BMW",
            "value": "75",
            "extracted_value": 75
          },
          {
            "query": "Audi",
            "value": "40",
            "extracted_value": 40
          }
        ]
      }
    ],
    "averages": [
      {
        "query": "Mercedes",
        "value": 55
      },
      {
        "query": "BMW",
        "value": 82
      },
      {
        "query": "Audi",
        "value": 43
      }
    ]
  },
  "compared_breakdown_by_region": "Google Trends hasn't returned any results for this query.",
  "interest_by_region": [
    {
      "geo": "AL",
      "location": "Albania",
      "max_value_index": 0,
      "value": "100",
      "extracted_value": 100
    },
    ... other results
    {
      "geo": "TH",
      "location": "Thailand",
      "max_value_index": 0,
      "value": "4",
      "extracted_value": 4
    }
  ],
  "related_topics": "Google Trends hasn't returned any results for this query.",
  "related_queries": "Google Trends hasn't returned any results for this query."
}

Join us on Twitter | YouTube

Add a Feature Request๐Ÿ’ซ or a Bug๐Ÿž

ย