Download Jupyter notebook here
In this document, we will review four of the AYLIEN News API's most commonly used endpoints:
We will utilise AYLIEN's Python SDK (Software Development Kit) and also show you some helpful code to start wrangling the data in Python using Pandas and visualizing it using Plotly.
As an exercise, we will focus on pulling news stories related to Citibank, to show how these different endpoints can be used in combination to investigate a topic of your choice.
Please note, comprehensive documentation on how to use the News API can be found here.
Here we will outline how to connect to AYLIEN's News API and define some useful functions to make pulling and analysing our data easier.
First things first — we need to connect to the News API. Make sure that you have installed the aylien_news_api library using pip. The code below demonstrates how to connect to the API and also imports some other libraries that will be useful later.
Don't forget to enter your API credentials in order to connect to the API! If you don't have any credentials yet, you can sign up for a free trial here.
from __future__ import print_function
# install packages if not installed already
# !pip install datetime
# !pip install pandas
# !pip install numpy
# !pip install plotly
# !pip install aylien_news_api
# !pip install chart_studio
# !pip install tqdm
# !pip install pprint
# !pip install wordcloud
import os
import requests
import datetime
from dateutil.tz import tzutc
import json
import time
import pandas as pd
import numpy as np
import math
from tqdm import tqdm
from pprint import pprint
# for visualization
import plotly.graph_objs as go
import chart_studio.plotly as py
from plotly.subplots import make_subplots
headers = {
'X-AYLIEN-NewsAPI-Application-ID': 'ID',
'X-AYLIEN-NewsAPI-Application-Key': 'KEY'
}
print('Complete')
Complete
The Functions below will be used to pull the data from the API using get requests. In some cases, data will be returned as an array of objects e.g. the get_stories function. In others data will be returned as Pandas dataframes e.g. the get_timeseires function.
#=======================================================================================
def get_timeseries(params, print_params = None, print_count = None):
if print_params is None or print_params == 'yes':
pprint(params)
response = requests.get('https://api.aylien.com/news/time_series', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
#convert to dataframe
timeseries_data = pd.DataFrame(response['time_series'])
# convert back to datetime
timeseries_data['published_at'] = pd.to_datetime(timeseries_data['published_at'])
timeseries_data['published_at'] = timeseries_data['published_at'].dt.date
if print_count is None or print_count == 'yes':
print('Number of stories returned : ' + str(format(timeseries_data['count'].sum(), ",d")))
return timeseries_data
#=======================================================================================
def get_stories(params, print_params = None, print_count = None, print_story = None):
if print_params is None or print_params == 'yes':
pprint(params)
fetched_stories = []
stories = None
while stories is None or len(stories) > 0:
try:
response = requests.get('https://api.aylien.com/news/stories', params=params, headers=headers).json()
except Exception as e:
continue
if 'errors' in response or 'error' in response:
pprint(response)
stories = response['stories']
if len(stories) > 0:
print(stories[0]['title'])
print(stories[0]['links']['permalink'])
params['cursor'] = response['next_page_cursor']
fetched_stories += stories
if (print_story is None or print_story == 'yes') and len(stories) > 0:
pprint(stories[0]['title'])
if print_count is None or print_count == 'yes':
print("Fetched %d stories. Total story count so far: %d" %(len(stories), len(fetched_stories)))
return fetched_stories
#=======================================================================================
def get_top_ranked_stories(params, no_stories, print_params = None, print_count = None):
if print_params is None or print_params == 'yes':
pprint(params)
fetched_stories = []
stories = None
while stories is None or len(stories) > 0 and len(fetched_stories) < no_stories:
try:
response = requests.get('https://api.aylien.com/news/stories', params=params, headers=headers).json()
except Exception as e:
continue
if 'errors' in response or 'error' in response:
pprint(response)
stories = response['stories']
if len(stories) > 0:
print(stories[0]['title'])
print(stories[0]['links']['permalink'])
params['cursor'] = response['next_page_cursor']
fetched_stories += stories
if print_count is None or print_count == 'yes':
print("Fetched %d stories. Total story count so far: %d" %(len(stories), len(fetched_stories)))
return fetched_stories
#=======================================================================================
def get_clusters(params={}):
#pprint(params)
response = requests.get('https://api.aylien.com/news/clusters', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
return response
#=======================================================================================
# pull trends data to identify most frequently occuring entities or keywords
def get_trends(params={}):
#pprint(params)
response = requests.get('https://api.aylien.com/news/trends', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
return response
#=======================================================================================
def get_cluster_from_trends(params, print_params = None):
if print_params is None or print_params == 'yes':
pprint(params)
"""
Returns a list of up to 100 clusters that meet the parameters set out.
"""
response = requests.get('https://api.aylien.com/news/trends', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
if len(response) > 0:
return response["trends"]
#=======================================================================================
# identify the top ranked story per cluster
def get_top_stories_in_cluster(cluster_id, no_stories):
top_story_params = {
'clusters[]' : [cluster_id]
, 'sort_by' : "source.rankings.alexa.rank"
, 'per_page' : no_stories
, 'return[]' : ['id', 'language', 'links', 'title', 'source', 'translations', 'clusters', 'published_at']
}
response = requests.get('https://api.aylien.com/news/stories', params=top_story_params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
if len(response["stories"]) > 0:
return response["stories"]
else:
return None
#=======================================================================================
# helper endpoint that takes a string of characters and an entity type (such as sources, or DBpedia entities) and returns matching entities of the specified type along with additional metadata
# params = {'type' : 'source_names', 'term' : 'Times of India' }
def autocompletes(params={}):
pprint(params)
"""
Returns a list of up to 100 clusters that meet the parameters set out.
"""
response = requests.get('https://api.aylien.com/news/autocompletes', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
pprint(response)
These other functions will help us format data as necessary.
# return transalted title or body of a story (specify in params)
def return_translated_content(story_x, text_x):
if 'translations' in story_x:
return story_x['translations']['en'][text_x]
else:
return story_x[text_x]
# create smaller lists from big lists
def chunks(lst, n):
return list(lst[i:i + n] for i in range(0, len(lst), n))
#=======================================================================================
# split title string over multiple lines for legibility on graph
def split_title_string(dataframe_x, column_x):
title_strings = []
for index, row in dataframe_x.iterrows():
word_array = row[column_x].split()
counter = 0
string = ''
for word in word_array:
if counter == 7:
string += (word + '<br>')
counter = 0
else:
string += (word + ' ')
counter += 1
title_strings.append(string)
dataframe_x[column_x + '_string'] = (title_strings)
#=======================================================================================
def print_keyword_mention(story_x, element_x, keyword_x):
body_x = story[element_x]
if 'translations' in story and story['translations'] is not None and 'en' in story['translations']:
body_x = story['translations']['en'][element_x]
# extract a window around key entity
e_idx = body_x.find(keyword_x)
e_end = e_idx + len(keyword_x)
if e_idx >= 0:
e_str = body_x[e_idx-100:e_idx] + "\033[1m" + body_x[e_idx:e_end] + "\033[0m " + body_x[e_end+1:e_end+51]
print(f'{e_str}')
elif element_x == 'title':
print(story['title'])
#=======================================================================================
def print_entities(story_x, element_x = None, surface_form_x = None, version_x = None):
element = ''
if element_x is None or element_x == 'body':
element = 'body'
else:
element = 'title'
# if no surface_form
if surface_form_x is None:
for entity in story_x['entities']:
pprint(entity)
else:
for entity in story_x['entities']:
x = 0
for surface_form in entity[element_x]['surface_forms']:
if surface_form_x.lower() in surface_form['text'].lower():
x = 1
if x != 0:
pprint(entity)
The most granular data point we can extract from the News API is a story; all other endpoints are aggregations or extrapolations of stories. Stories are basically news articles that have been enriched using AYLIEN's machine learning prcoess. We will learn more about this enrichment later.
For now we will pull one story published in English in the last hour.
# define parameters
params = {
'published_at.start': 'NOW-1HOUR'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'per_page' : 1
}
stories = get_top_ranked_stories(params, 1)
print()
pprint(stories)
{'language[]': ['en'], 'per_page': 1, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1HOUR'} USA TODAY Sports Josh McDaniels on Raiders' Resiliency in OT Win Over Seattle Originally posted on FanNation Raider Maven By Aidan Champion | Last updated 11/28/22 https://www.yardbarker.com/nfl/articles/josh_mcdaniels_on_raiders_resiliency_in_ot_win_over_seattle/s1_16640_38177445 Fetched 1 stories. Total story count so far: 1 [{'author': {'id': 27406279, 'name': 'Aidan Champion'}, 'body': 'For the second week in a row, the Las Vegas Raiders found the will ' 'to win in overtime on the road.\n' ' A week after defeating the Denver Broncos on a walk-off play in ' 'OT, the Raiders did the same against a solid Seattle Seahawks ' 'team. "I think our team is obviously learning how to be ' 'resilient," Raiders coach Josh McDaniels said in his postgame press ' 'conference Sunday. "And give Seattle a lot of credit. This is a ' "good football team, they're well coached like we thought they would " 'be. Pete [Carroll] does a great job, and they gave us some fits on ' 'some things and made some adjustments and we had to make some ' 'adjustments and it was a very interesting game in that regard. But ' 'I thought our guys were tough." Sunday\'s game was a sequence of ' 'ups and downs, with the Raiders even falling behind by a touchdown ' 'with just over 5 and half minutes remaining in regulation. "You ' 'got to focus on the next drive, the next sequence, the next group ' 'that\'s going to go out there on the field," McDaniels said. "I ' "mean, it started from the first play to the last play. First play's " "an interception and the last play's a touchdown. There was a lot of " 'swings, and I credit our coaches. Our coaches did a really good job ' 'of staying neutral at times when they needed to be and trying to ' 'fix the problems if there were any and address those without having ' 'a bunch of emotion in it." As promising as the back-to-back ' 'victories have been for the Silver and Black, McDaniels has always ' 'felt optimistic his team was heading in the right direction. ' '"I\'ve never doubted that it was," he said. "And like I said, the ' "NFL, there's a lot of close games every week, and sometimes it " 'takes a little while to learn how to get over the hump on some of ' "those things, and that's what we attribute it to. Doesn't guarantee " "us anything going forward. We're going to stick with our process, " 'we think we have a really close-knit group here that works hard, we ' "believe in what we're doing, we believe in what we're coaching, we " "believe in trying to win the way we're trying to win. And I think " 'our guys do, too." This article first appeared on FanNation Raider ' 'Maven and was syndicated with permission. More must-reads:', 'categories': [{'id': 'IAB17', 'label': 'Sports', 'links': {'self': 'https://api.aylien.com/api/v1/classify/taxonomy/iab-qag/IAB17'}, 'score': 0.33, 'taxonomy': 'iab-qag'}, {'id': 'IAB17-12', 'label': 'Football', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/iab-qag/IAB17'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/iab-qag/IAB17-12'}, 'score': 0.24, 'taxonomy': 'iab-qag'}, {'id': '15003000', 'label': 'American football', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/iptc-subjectcode/15000000'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/iptc-subjectcode/15003000'}, 'score': 0.54, 'taxonomy': 'iptc-subjectcode'}, {'id': 'ay.lifesoc.prosport', 'label': 'Professional Sports', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.lifesoc.prosport'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports', 'label': 'Sports', 'links': {'parents': [], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports.football', 'label': 'Football (American)', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.team'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.football'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports.nfl', 'label': 'National Football League', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.lifesoc.prosport', 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.football'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.nfl'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports.team', 'label': 'Team Sports', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.team'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.lifesoc.gensport', 'label': 'General Sports', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.lifesoc.gensport'}, 'score': 0.9, 'taxonomy': 'aylien'}], 'characters_count': 2187, 'clusters': [409119966], 'entities': [{'body': {'sentiment': {'confidence': 0.59, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 387, 'start': 380}, 'sentiment': {'confidence': 0.59, 'polarity': 'neutral'}}], 'text': 'Seattle'}]}, 'external_ids': {}, 'id': 'Q5083', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q5083', 'wikipedia': 'https://en.wikipedia.org/wiki/Seattle'}, 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.74, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 80, 'start': 73}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'Seattle'}]}, 'types': ['Local_government', 'Corporation', 'Location', 'Political_organisation', 'City', 'Government', 'Community', 'Company', 'Organization']}, {'body': {'sentiment': {'confidence': 0.74, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 323, 'start': 309}, 'sentiment': {'confidence': 0.74, 'polarity': 'positive'}}], 'text': 'Josh McDaniels'}]}, 'external_ids': {}, 'id': 'Q3810320', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q3810320', 'wikipedia': 'https://en.wikipedia.org/wiki/Josh_McDaniels'}, 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 34, 'start': 20}, 'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}}], 'text': 'Josh McDaniels'}]}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 3, 'mentions': [{'index': {'end': 179, 'start': 172}, 'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}}, {'index': {'end': 302, 'start': 295}, 'sentiment': {'confidence': 0.67, 'polarity': 'positive'}}, {'index': {'end': 775, 'start': 768}, 'sentiment': {'confidence': 0.59, 'polarity': 'negative'}}], 'text': 'Raiders'}]}, 'external_ids': {}, 'id': 'Q5870124', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q5870124', 'wikipedia': 'https://en.wikipedia.org/wiki/History_of_the_Oakland_Raiders'}, 'overall_frequency': 4, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.82, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.91, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 45, 'start': 38}, 'sentiment': {'confidence': 0.91, 'polarity': 'neutral'}}], 'text': 'Raiders'}]}, 'types': []}, {'body': {'sentiment': {'confidence': 0.79, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 2134, 'start': 2112}, 'sentiment': {'confidence': 0.79, 'polarity': 'neutral'}}], 'text': 'FanNation Raider Maven'}]}, 'external_ids': {}, 'id': 'N186086181726508417844685281276398801348', 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.85, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.9, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 132, 'start': 110}, 'sentiment': {'confidence': 0.9, 'polarity': 'neutral'}}], 'text': 'FanNation Raider ' 'Maven'}]}, 'types': ['Location']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N130591893304718568511464573285032572817', 'overall_frequency': 1, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 16, 'start': 0}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'USA TODAY Sports'}]}, 'types': ['Organization']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N223547351047399335585713261784924445595', 'overall_frequency': 1, 'overall_prominence': 0.91, 'overall_sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 72, 'start': 64}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'Win Over'}]}, 'types': ['Location']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'Q132148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q132148', 'wikipedia': 'https://en.wikipedia.org/wiki/Aidan_of_Lindisfarne'}, 'overall_frequency': 1, 'overall_prominence': 0.8, 'overall_sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 144, 'start': 139}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'Aidan'}]}, 'types': ['Human']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N283599043316305970941549218810195124075', 'overall_frequency': 1, 'overall_prominence': 0.76, 'overall_sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 174, 'start': 170}, 'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}}], 'text': 'Last'}]}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.72, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 51, 'start': 34}, 'sentiment': {'confidence': 0.72, 'polarity': 'positive'}}], 'text': 'Las Vegas Raiders'}]}, 'external_ids': {}, 'id': 'Q324523', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q324523', 'wikipedia': 'https://en.wikipedia.org/wiki/Las_Vegas_Raiders'}, 'overall_frequency': 1, 'overall_prominence': 0.68, 'overall_sentiment': {'confidence': 0.72, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Nonprofit_organization', 'Organization']}, {'body': {'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 141, 'start': 127}, 'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}}], 'text': 'Denver Broncos'}]}, 'external_ids': {}, 'id': 'Q223507', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q223507', 'wikipedia': 'https://en.wikipedia.org/wiki/Denver_Broncos'}, 'overall_frequency': 1, 'overall_prominence': 0.55, 'overall_sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Nonprofit_organization', 'Organization']}, {'body': {'sentiment': {'confidence': 0.75, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 166, 'start': 164}, 'sentiment': {'confidence': 0.75, 'polarity': 'neutral'}}], 'text': 'OT'}]}, 'external_ids': {}, 'id': 'Q186982', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q186982', 'wikipedia': 'https://en.wikipedia.org/wiki/Overtime_(sports)'}, 'overall_frequency': 1, 'overall_prominence': 0.49, 'overall_sentiment': {'confidence': 0.75, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': []}, {'body': {'sentiment': {'confidence': 0.58, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 225, 'start': 209}, 'sentiment': {'confidence': 0.58, 'polarity': 'neutral'}}], 'text': 'Seattle Seahawks'}]}, 'external_ids': {}, 'id': 'Q221878', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q221878', 'wikipedia': 'https://en.wikipedia.org/wiki/Seattle_Seahawks'}, 'overall_frequency': 1, 'overall_prominence': 0.43, 'overall_sentiment': {'confidence': 0.58, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Nonprofit_organization', 'Organization']}, {'body': {'sentiment': {'confidence': 0.91, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 2, 'mentions': [{'index': {'end': 995, 'start': 986}, 'sentiment': {'confidence': 0.83, 'polarity': 'neutral'}}, {'index': {'end': 1458, 'start': 1449}, 'sentiment': {'confidence': 0.91, 'polarity': 'positive'}}], 'text': 'McDaniels'}]}, 'external_ids': {}, 'id': 'Q16846249', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q16846249', 'wikipedia': 'https://en.wikipedia.org/wiki/K._J._McDaniels'}, 'overall_frequency': 2, 'overall_prominence': 0.34, 'overall_sentiment': {'confidence': 0.91, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.97, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 491, 'start': 487}, 'sentiment': {'confidence': 0.97, 'polarity': 'positive'}}], 'text': 'Pete'}]}, 'external_ids': {}, 'id': 'N334899751825691118615243936337416130988', 'overall_frequency': 1, 'overall_prominence': 0.03, 'overall_sentiment': {'confidence': 0.97, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.9, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 1447, 'start': 1442}, 'sentiment': {'confidence': 0.9, 'polarity': 'positive'}}], 'text': 'Black'}]}, 'external_ids': {}, 'id': 'N309782724290245396668082628620805636318', 'overall_frequency': 1, 'overall_prominence': 0.02, 'overall_sentiment': {'confidence': 0.9, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 1600, 'start': 1597}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'NFL'}]}, 'external_ids': {}, 'id': 'Q1215884', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q1215884', 'wikipedia': 'https://en.wikipedia.org/wiki/National_Football_League'}, 'overall_frequency': 1, 'overall_prominence': 0.02, 'overall_sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Business', 'Organization']}], 'hashtags': ['#Seattle', '#JoshMcDaniels', '#WalkoffHomeRun', '#USAToday', '#Touchdown', '#SeattleSeahawks', '#Overtime', '#OaklandRaiders', '#NationalFootballLeague', '#NFL', '#MavenHuffman', '#Interception', '#Emotion', '#DenverBroncos', '#BroadcastSyndication', '#AssociationFootball'], 'id': 5079455953, 'industries': [], 'keywords': ['things', 'OT', 'Seattle Seahawks', 'week', 'Aidan Champion', 'Seattle', 'Maven', 'team', 'play', 'emotion', 'touchdown', 'Josh McDaniels', 'Win Over', 'overtime', 'Black', 'interception', 'walk-off', 'Pete', 'syndicated', 'McDaniels', 'Last', 'Raiders', 'USA TODAY', 'Denver Broncos', 'NFL', 'football', 'Sunday'], 'language': 'en', 'license_type': 0, 'links': {'clusters': '/stories?clusters[]=409119966', 'permalink': 'https://www.yardbarker.com/nfl/articles/josh_mcdaniels_on_raiders_resiliency_in_ot_win_over_seattle/s1_16640_38177445', 'related_stories': '/related_stories?story_id=5079455953'}, 'media': [{'format': 'JPEG', 'height': 900, 'type': 'image', 'url': 'https://www.yardbarker.com/media/7/6/76d5488c5ae0f0dd9b411351871bdc7a7b623a6b/thumb_16x9/usatsi_19517121_168390101_lowres.jpg?v=1', 'width': 1600}], 'paragraphs_count': 2, 'published_at': '2022-11-28T16:58:47Z', 'sentences_count': 22, 'sentiment': {'body': {'polarity': 'positive', 'score': 0.6}, 'title': {'polarity': 'neutral', 'score': 0.75}}, 'source': {'domain': 'yardbarker.com', 'home_page_url': 'https://www.yardbarker.com/', 'id': 117069, 'locations': [{'country': 'US'}], 'logo_url': '', 'name': 'Yardbarker', 'scopes': []}, 'summary': {'sentences': ['For the second week in a row, the Las Vegas ' 'Raiders found the will to win in overtime on the ' 'road.\n' ' ', 'A week after defeating the Denver Broncos on a ' 'walk-off play in OT, the Raiders did the same ' 'against a solid Seattle Seahawks team. ', '"I think our team is obviously learning how to be ' 'resilient," Raiders coach Josh McDaniels said in ' 'his postgame press conference Sunday.', '"And like I said, the NFL, there\'s a lot of ' 'close games every week, and sometimes it takes a ' 'little while to learn how to get over the hump on ' "some of those things, and that's what we " 'attribute it to.', "Sunday's game was a sequence of ups and downs, " 'with the Raiders even falling behind by a ' 'touchdown with just over 5 and half minutes ' 'remaining in regulation. ']}, 'title': "USA TODAY Sports Josh McDaniels on Raiders' Resiliency in OT " 'Win Over Seattle Originally posted on FanNation Raider ' 'Maven By Aidan Champion \t\t\t\t\t\t\xa0|\xa0 \t\t\t\t\tLast ' 'updated 11/28/22', 'words_count': 420}]
We can see that the story output is a list with one dictionary object representing the story we queried. The story object inlcudes the title, body text, summary sentences and lots of other contextual information that has been made available via AYLIEN's enrichment process.
We can loop through the object's key names to give us a flavour of what is available.
for key in stories[0]:
print(key)
author body categories industries characters_count clusters entities hashtags id keywords language links media paragraphs_count published_at sentences_count sentiment source summary title words_count license_type
Using a keyword search, we can search the AYLIEN database for words that appear in the title or body of an article. Here we will search for "Citigroup" in the title.
We will also limit the the date range — if we don't, we could return thousands of stories that feature "Citigroup" in the title — and define the language as English ("en"). Defining the language not only limits our output to English language content, it also allows the query to to remove any relevant stopwords. Learn about stopwords here.
We will also introduce the cursor. We don't know how many stories we'll get, and the cursor will allow us to scan through results. Learn more about using the cursor here.
The per_page parameter defines how many stories are returned for each API call, with 100 being the max.
The default parameters below will use relative times to ensure you can access recent news data (historical data is restricted). You can try changing the time periods by altering the paramters using the following formats:
# define the query parameters
params = {
'language[]': ['en'],
'title': 'Citigroup',
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'cursor': '*',
'per_page' : 50
}
stories = get_stories(params)
print('************')
print("Fetched %s stories" %(len(stories)))
{'cursor': '*', 'language[]': ['en'], 'per_page': 50, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS', 'title': 'Citigroup'} BUZZ-Live Nation rises as Citigroup lifts rating to 'buy' https://www.swissquote.ch/sqi_premium/market/news/News.action?id=14986092 "BUZZ-Live Nation rises as Citigroup lifts rating to 'buy'" Fetched 50 stories. Total story count so far: 50 Caribou Biosciences (NASDAQ:CRBU) Given New $37.00 Price Target at Citigroup https://dakotafinancialnews.com/2022/11/27/caribou-biosciences-nasdaqcrbu-given-new-37-00-price-target-at-citigroup.html 'Caribou Biosciences (NASDAQ:CRBU) Given New $37.00 Price Target at Citigroup' Fetched 50 stories. Total story count so far: 100 Northern Oil and Gas (NYSE:NOG) PT Raised to $46.00 at Citigroup https://baseballnewssource.com/2022/11/27/northern-oil-and-gas-nysenog-pt-raised-to-46-00-at-citigroup/7852403.html 'Northern Oil and Gas (NYSE:NOG) PT Raised to $46.00 at Citigroup' Fetched 7 stories. Total story count so far: 107 Fetched 0 stories. Total story count so far: 107 ************ Fetched 107 stories
Depending on what parameters you used (and of course, how much Citgroup featured in the news), your number of stories may vary. Let's print the first 10 titles to get a feel for the stories we have pulled.
for story in stories[0:10]:
print(story['id'])
print(story['title'])
print('')
5079435568 BUZZ-Live Nation rises as Citigroup lifts rating to 'buy' 5079419698 Citigroup Inc. (NYSE: C) Is Rated A Buy By Analysts. 5079358521 Ensign Peak Advisors Inc Has $57.29 Million Stock Holdings in Citigroup Inc. (NYSE:C) 5079233045 Citigroup Trims Galera Therapeutics (NASDAQ:GRTX) Target Price to $18.00 5079229312 Citigroup Upgrades Live Nation Entertainment (NYSE:LYV) to “Buy” 5079207000 Citigroup Upgrades Live Nation Entertainment (NYSE:LYV) to “Buy” 5079202754 MeridianLink (NYSE:MLNK) Price Target Lowered to $16.00 at Citigroup 5079174029 NuCana (NASDAQ:NCNA) PT Lowered to $2.00 at Citigroup 5079172560 Citigroup Raises Five Below (NASDAQ:FIVE) Price Target to $186.00 5079171435 MeridianLink (NYSE:MLNK) Price Target Lowered to $16.00 at Citigroup
What if we want to refine our keyword search further? We can create more complicated searches using Boolean statements. For instance, if we were interested in searching for news that mentioned Citigroup or Bank of America and that also mentioned "shares" but not "sell", we could write the following query. It is important to note here that the "Bank of America" search term is wrapped in double quotes — if it wasn't, each individual word would be treated as an indivudal search term, but we want to search for the full phrase.
# define the query parameters
params = {
'language[]': ['en'],
'title': '("Citigroup" OR "Bank of America" ) AND "shares" NOT "sell"',
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'cursor': '*',
'per_page' : 50
}
stories = get_stories(params)
print('************')
print("Fetched %s stories" %(len(stories)))
print('************')
for story in stories:
print(story['title'])
print('')
{'cursor': '*', 'language[]': ['en'], 'per_page': 50, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS', 'title': '("Citigroup" OR "Bank of America" ) AND "shares" NOT "sell"'} Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase https://www.informnny.com/news/business/press-releases/cision/20221128NY47953/bank-of-america-corporation-announces-hypothetical-accrued-dividends-and-hypothetical-total-consideration-for-libor-depositary-shares-sought-in-its-cash-tender-offers-and-amendments-to-the-offer-to-pu/ ('Bank of America Corporation Announces Hypothetical Accrued Dividends and ' 'Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its ' 'Cash Tender Offers and Amendments to the Offer to Purchase') Fetched 28 stories. Total story count so far: 28 Fetched 0 stories. Total story count so far: 28 ************ Fetched 28 stories ************ Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer t... Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase BRIEF-Bank Of America Corporation Announces Hypothetical Accrued Dividends And Hypothetical Total Consideration For LIBOR Depositary Shares Sought In Its Cash Tender Offers And Amendments To Offer To Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Strategic Blueprint LLC Acquires 447 Shares of Bank of America Co. (NYSE:BAC) Aramco unit hires HSBC, Citigroup for Riyadh share sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Westover Capital Advisors LLC Acquires New Shares in Bank of America Co. (NYSE:BAC) Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Saudi Aramco Base Oil hires HSBC, Citigroup for Riyadh share sale Aramco Unit Hires HSBC, Citigroup for $1 Billion Share Sale Aramco Unit Hires HSBC, Citigroup for $1 Billion Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale (1) Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Eubel Brady & Suttman Asset Management Inc. Purchases 3,906 Shares of Citigroup Inc. (NYSE:C) Bank of America Co. (NYSE:BAC) Shares Sold by Robertson Stephens Wealth Management LLC Eubel Brady & Suttman Asset Management Inc. Buys 3,906 Shares of Citigroup Inc. (NYSE:C)
We can see that we can refine our query by adding Boolean operators to our keyword search. However, this can become more complicated if we want to cast our net wider. For instance, let's say we want to pull stories about the banking sector in general. Rather than writing a complicated keyword search, we can search by a news category.
AYLIEN'S NLP enrichment classifies stories into categories to allow us to make more powerful searches. Our classifier is capable of classifying content into two taxonomies where a code corresponds with a a subject. Learn more here.
Here, we will search for all stories classified as "banking" (04006002) using the IPTC subject taxonomy. You can search for other IPTC codes here.
Many stories will be categorised under "banking", so we will restrict our output to the first 100.
We can also perform categorial search using the IAB taxonomy or the AYLIEN Smart Tagger which will be discussed later.
# define the query parameters
params = {
'language': ['en'],
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'categories.taxonomy[]': 'iptc-subjectcode',
'categories.id[]': ['04006002'],
'cursor': '*',
'per_page' : 10
}
print(params)
stories = get_top_ranked_stories(params, 10)
print('************')
print("Fetched %s stories" %(len(stories)))
for story in stories:
print(story['title'])
print('')
{'language': ['en'], 'published_at.start': 'NOW-2DAYS', 'published_at.end': 'NOW', 'categories.taxonomy[]': 'iptc-subjectcode', 'categories.id[]': ['04006002'], 'cursor': '*', 'per_page': 10} {'categories.id[]': ['04006002'], 'categories.taxonomy[]': 'iptc-subjectcode', 'cursor': '*', 'language': ['en'], 'per_page': 10, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS'} Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago https://journaltimes.com/lifestyles/health-med-fit/wall-street-slips-as-lockdown-protests-spread-in-china/article_81621f36-0538-520e-9234-9e04289ca9ab.html Fetched 10 stories. Total story count so far: 10 ************ Fetched 10 stories Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago 'The Bank of Canada Still Has Your Back, But It's Got a Knife In It': Experts Weigh In On Market Future Teton Advisors Inc. Lowers Holdings in Value Line, Inc. (NASDAQ:VALU) Safra New York Corporation To Acquire Delta National Bank and Trust The Bank-Run Phenomenon 684. News: Daylight builds for the LGBTQ+ community and the FCA hits back at trading apps Safra New York Corporation To Acquire Delta National Bank and Trust Nigerian man flaunts over N1m saved in his piggy bank after he stopped doing 9k weekly data sub Get £175 for switching to Halifax…but there's a catch Keith Ligori
You may find you want to sort your query response by some metric. In the examples above, we have taken the top N stories.
These have been sorted - by default - by published date i.e. we are getting the most recent N stories that meet our search criteria.
Sorting the query response is particularly useful when many stories meet our search criteria but we only want N stories. For example, say 1,000 stories met our search criteria - we could sort these stories by a range of metrics and return the top N.
We can use the following paramters to sort our response by:
You can read more about sorting in our docs.
The sort order by default is descending, but we can explictly state which direction we want to sort by using the 'sort_by' parameter.
In the following example, we perform a keyword search and sort by keyword relevance.
params = {
'language': ['en'],
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'text' : 'Microsoft AND (merge OR acquire)',
'cursor': '*',
'per_page' : 10,
'sort_by' : 'relevance'
}
print(params)
stories = get_top_ranked_stories(params, 10)
print('************')
print("Fetched %s stories" %(len(stories)))
for story in stories:
print(story['title'])
print('')
{'language': ['en'], 'published_at.start': 'NOW-2DAYS', 'published_at.end': 'NOW', 'text': 'Microsoft AND (merge OR acquire)', 'cursor': '*', 'per_page': 10, 'sort_by': 'relevance'} {'cursor': '*', 'language': ['en'], 'per_page': 10, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS', 'sort_by': 'relevance', 'text': 'Microsoft AND (merge OR acquire)'} In transaction documents between Microsoft COR and Activision Blizzard, Inc date of an exit of The Elder Scrolls 6] was foun https://news.myseldon.com/en/news/index/275465595 Fetched 10 stories. Total story count so far: 10 ************ Fetched 10 stories In transaction documents between Microsoft COR and Activision Blizzard, Inc date of an exit of The Elder Scrolls 6] was foun New Microsoft partnership to drive technical growth at MTN Group Autonomy Orders 2,500 VinFast VF 8 And VF 9 Electric Cars Microsoft Reported To Extend Call Of Duty Multiplatform Release To PlayStation For Ten Years Where to get a Choice Specs in Pokémon Scarlet and Violet How to get a Choice Specs in Pokémon Scarlet and Violet Sony wanted to bring PlayStation Plus to Xbox, but Microsoft “wouldn't let it happen,” says SIE Joe Jonas urges people to 'check in' with themselves and their friends 6 biggest deal reports this week: Manchester United open to selling the club By Finally! Microsoft Reveals Why It Prefers Elder Scrolls 6 as Xbox Activision Blizzard, Inc and Microsoft COR accused of arrangement and falsification of the transaction on merge for $69 billion
The AYLIEN Query Language (or AQL), is AYLIEN's custom 'flavour' of the Lucene syntax that enables users to make more powerful queries on our data.
Queries in this syntax are made within an 'aql' parameter.
AQL enables us to perform more sophisticated searches like boosting the importance of keywords and enhanced entity search.
When making a query with many keywords, sometimes one keyword in is more important to your search than others. Boosting enables you to add weight to the more important keyword/keywords so that results mentioning these keywords are given a “boost” to get them higher in the results order.
For example, searching ["John", "Frank", "Sarah"] gives equal weight to each term, but ["John", "Frank"^2, "Sarah"] is like saying a mention of “Frank” is twice as important as a mention of “John” or “Sarah”. Stories mentioning “Frank” will therefore appear higher in the rank of search results. We can reduce the importance of a keyword by attributing a decimal number e.g. 0.5.
Boosting is not the definitive keyword search input, simply allows the user to specify the preponderant keywords in a list (i.e. if a story contains many mentions of non-boosted searched keywords, it could still be returned ahead of many stories that mention a boosted keyword). Boosting therefore does not exclude stories from the results, it only affects the order of returned results.
The boost is allocated using the ^ symbol.
In the example below, we search for a wide variety of keywords but give special significance to the "radioactive" keyword.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'title:(("toxic" "chemical" "industrial" "radioactive"^10 "sewerage") AND ("spill" "leak" "dump" "disaster" "contaminate" "waste" "pollute"))'
, 'sort_by' : 'relevance'
}
stories = get_top_ranked_stories(params, 10)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
{'aql': 'title:(("toxic" "chemical" "industrial" "radioactive"^10 "sewerage") ' 'AND ("spill" "leak" "dump" "disaster" "contaminate" "waste" ' '"pollute"))', 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH', 'sort_by': 'relevance'} Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005779051 Fetched 10 stories. Total story count so far: 10 ############# Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005779051 Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005772026 Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005772025 Norway-Kaupanger: Radioactive-, toxic-, medical- and hazardous waste services https://ted.europa.eu/udl?uri=TED:NOTICE:656593-2022:TEXT:EN:HTML State must stop plan to dump radioactive water Ukraine shelled radioactive waste storage – official https://www.rt.com/russia/566874-zaporozhye-waste-storage-shelled/?utm_source=rss&utm_medium=rss&utm_campaign=RSS UN NUCLEAR CHIEF DECLARES RADIOACTIVE WASTE RECYCLING DIFFICULT Chinese Radiation Protection Res Institute Seeks Patent for Radioactive Waste Resin Dehydration Metering Feeding Device EDF says radioactive leak at Civaux reactor not due to... https://www.dailymail.co.uk/wires/reuters/article-11403041/EDF-says-radioactive-leak-Civaux-reactor-not-welding.html?ns_mchannel=rss&ns_campaign=1490&ito=1490 Chinese Radiation Protection Res Institute Submits Chinese Patent Application for Radioactive Waste Resin Wet Oxidation Device
Frequently, keywords of interest to us are mentioned in varying sequences of terms. For example, HSBC's division in China could appear in multiple forms: “HSBC China”, “HSBC’s branches in China”, “In China, HSBC is introducing new…” , etc.
Proximity search is a feature that enables user to broaden the search criteria to return these combinations. “Proximity” refers to the distance, in terms, between two searched terms in a story. For example, "HSBC China"~5 only returns stories that mention "HSBC" and "China", where there is a maximum of four words in between them.
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
#, 'body': 'HSBC AND China'
, 'body': '"HSBC China"~4'
, 'sort_by' : 'relevance'
, 'language[]' : ['en']
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
keywords = ["HSBC"]
for story in stories:
print('##################')
print(story['published_at'])
print(story['id'])
print(story['title'])
print(story['words_count'])
print(story['links']['permalink'])
for item in keywords:
print('Keyword mention:')
print_keyword_mention(story, 'body', item)
print()
{'body': '"HSBC China"~4', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS', 'sort_by': 'relevance'} Mercedes-Benz becomes 1st MNC to issue green Panda bond in China http://www.shine.cn/biz/finance/2211283456/ Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T13:26:26Z 5079173598 Mercedes-Benz becomes 1st MNC to issue green Panda bond in China 330 http://www.shine.cn/biz/finance/2211283456/ Keyword mention: HSBC announced it has helped Mercedes-Benz to issue 500 ################## 2022-11-28T02:35:04Z 5078530062 HSBC's smart supply chain breaks the circle again to empower the digital future 1620 https://www.tellerreport.com/business/2022-11-28-hsbc-s-smart-supply-chain-breaks-the-circle-again-to-empower-the-digital-future.HyVx3aq-Po.html Keyword mention: reasonable growth of the quantity" is the goal of the future supply chain development. Among them, HSBC China won the regional awards of "Best Digital Tra ################## 2022-11-28T15:50:50Z 5079379311 October 31 Asia bond pipeline: What's coming up? 1277 Keyword mention: (Asia), CMB Wing Lung, Citi, CMBC, CEB, CTBC, China PA Securities, Guotai Junan, Guosen Securities, HSBC Huatai Intl, Haitong Intl, ICBC (Asia), Industria ################## 2022-11-28T15:54:58Z 5079383946 October 27 Asia bond pipeline: What's coming up? 1251 Keyword mention: Price to be set by Dutch auction | Tender deadline November 3 HSBC (Dealer manager) | Kroll Issuer Services (Tender a ################## 2022-11-28T15:52:27Z 5079381149 October 28 Asia bond pipeline: What's coming up? 1303 Keyword mention: (Asia), CMB Wing Lung, Citi, CMBC, CEB, CTBC, China PA Securities, Guotai Junan, Guosen Securities, HSBC Huatai Intl, Haitong Intl, ICBC (Asia), Industria
AYLIEN leverages two industry standard taxonomies in our news categorisation but we also leverage our own propriertary taxonomy - the Smart Tagger.
Smart Tagger leverages state-of-the-art classification models that have been built using a vast collection of manually tagged news articles based on domain-specific industry and topical taxonomies. Smart Tagger uses a highly effective rule-based classification system for identifying categorical and industry-related news content.
As part of the Smart Tagger update we’re introducing 2 new classification taxonomies; the AYLIEN Industry Taxonomy and the AYLIEN Category Taxonomy, which incorporates 2 curated category groupings; Adverse Events and Trading Impact Events.
You can explore these taxonomies here.
A wide and deep collection of topical categories covering popular topics specifically curated for the business and finance world.
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:"Environmental, Social and Governance"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:"Environmental, Social and ' 'Governance"}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot http://activerain.com/blogsview/5760941/nov-2022-wisconsin-forestland-sold-report-vernon-county--hunting--timber--investments--market-snapshot#article-comments-section Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot ################## 2022-11-28T16:58:33Z Residents urged to donate to Christmas clothing drive ################## 2022-11-28T16:58:12Z COP27 climate alarmists see oil demand hitting 18-year highs ################## 2022-11-28T16:57:50Z Tata Communications and Intertec Systems expand partnership, set up Cyber Security Operations Centre in UAE ################## 2022-11-28T16:57:18Z City of Houston Is Under a Water Boil Advisory, Affecting Millions
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND id:ay.lifesoc.esg}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND id:ay.lifesoc.esg}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot http://activerain.com/blogsview/5760941/nov-2022-wisconsin-forestland-sold-report-vernon-county--hunting--timber--investments--market-snapshot#article-comments-section Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot ################## 2022-11-28T16:58:12Z COP27 climate alarmists see oil demand hitting 18-year highs ################## 2022-11-28T16:58:01Z Leeward Renewable Energy closes funding for US solar projects ################## 2022-11-28T16:57:50Z Tata Communications and Intertec Systems expand partnership, set up Cyber Security Operations Centre in UAE ################## 2022-11-28T16:57:18Z City of Houston Is Under a Water Boil Advisory, Affecting Millions
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:"Disasters"}} NOT categories:{{taxonomy:aylien AND label:"Philanthropy"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:"Disasters"}} NOT ' 'categories:{{taxonomy:aylien AND label:"Philanthropy"}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} Hawaii's Mauna Loa starts to erupt, sending ash nearby https://www.chronicle-tribune.com/news/wire/hawaii-s-mauna-loa-starts-to-erupt-sending-ash-nearby/article_dd2bdac5-bc7e-56e0-9f6e-3407d90865e4.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z Hawaii's Mauna Loa starts to erupt, sending ash nearby ################## 2022-11-28T16:58:41Z Landslide kills at least 14 attending funeral in Cameroon capital | CNN ################## 2022-11-28T16:57:11Z Hawaii's Mauna Loa volcano starts to erupt, sending ash nearby ################## 2022-11-28T16:56:07Z Hawaii's Mauna Loa, the world's largest active volcano, erupted for the first time in nearly 40 years ################## 2022-11-28T16:55:56Z Mauna Loa is erupting for the first time since 1984, prompting an ashfall advisory for Hawaii's Big Island
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:("Disasters" "Fraud")}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:("Disasters" "Fraud")}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} Hawaii's Mauna Loa starts to erupt, sending ash nearby https://www.chronicle-tribune.com/news/wire/hawaii-s-mauna-loa-starts-to-erupt-sending-ash-nearby/article_dd2bdac5-bc7e-56e0-9f6e-3407d90865e4.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z Hawaii's Mauna Loa starts to erupt, sending ash nearby ################## 2022-11-28T16:58:41Z Landslide kills at least 14 attending funeral in Cameroon capital | CNN ################## 2022-11-28T16:58:10Z Cuomo-era New York corruption cases go before U.S. Supreme Court ################## 2022-11-28T16:57:11Z Hawaii's Mauna Loa volcano starts to erupt, sending ash nearby ################## 2022-11-28T16:56:46Z Irishman who stole €185,000 in social welfare payments says 'it was a victimless crime' More for you React Comments | 12
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:(Disasters) AND score:[0.7 TO *] sort_by(score)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:(Disasters) AND score:[0.7 TO ' '*] sort_by(score)}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} Some without water as sinkhole opens ground under GA truck | Columbus Ledger-Enquirer https://www.ledger-enquirer.com/news/state/georgia/article268916217.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-18T18:08:42Z Some without water as sinkhole opens ground under GA truck | Columbus Ledger-Enquirer ################## 2022-11-18T17:07:27Z Wildfires often lead to dust storms – and they’re getting bigger ################## 2022-11-18T17:50:20Z Earthquake of magnitude 6.9 shakes Indonesia ################## 2022-11-18T17:30:05Z Strong earthquake shakes western Indonesia; no tsunami alert ################## 2022-11-18T17:28:58Z When Is Hurricane Season In Florida And How To Prepare For It
A robust collection of multilevel tags that represent the industry a news article is covering.
Users can seach for Industry verticals using similar syntax as AYLIEN Categories.
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'industries: {{"Coal Mining" "Agriculture and Fishing" AND score:[0.7 TO *] sort_by(score)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'industries: {{"Coal Mining" "Agriculture and Fishing" AND score:[0.7 ' 'TO *] sort_by(score)}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators https://agrospectrumindia.com/2022/11/18/13th-agrovision-to-promote-agritech-startups-grassroot-innovators-though-special-pavilion.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-18T17:55:02Z 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators ################## 2022-11-18T17:56:29Z Kirin Holdings - Chateau Mercian Mariko Winery Chosen Yet Again By 'World's Best Vineyards 2022' ################## 2022-11-18T17:05:31Z Report suggests big changes for ag in Upper Rio Grande River basin ################## 2022-11-18T18:20:16Z Worldwide Microgreens Industry to 2027 - by Type, Farming Technique, Growth Medium, Distribution Channel, End-use, Company and Region ################## 2022-11-18T18:22:28Z Markham Vineyards Reopens Historic Tasting Room After Extensive Renovations
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'industries: {{in.mat.coalmine in.agfish AND score:[0.7 TO *] sort_by(score)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'industries: {{in.mat.coalmine in.agfish AND score:[0.7 TO *] ' 'sort_by(score)}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators https://agrospectrumindia.com/2022/11/18/13th-agrovision-to-promote-agritech-startups-grassroot-innovators-though-special-pavilion.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-18T17:55:02Z 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators ################## 2022-11-18T17:31:11Z Developments In the World of Fishing Sonar ################## 2022-11-18T17:56:29Z Kirin Holdings - Chateau Mercian Mariko Winery Chosen Yet Again By 'World's Best Vineyards 2022' ################## 2022-11-18T18:05:27Z After 7,000 years, Turkish wines are hitting the big time ################## 2022-11-18T18:05:14Z Soft lending for Russian agriculture to grow nearly twofold in 2022 to 177 bln rubles - AgMin
Similarly, we may be interested in searching for certain recurring subjects appearing in the news for example, banks, companies, dogs or even aliens! We could do this using keyword search but AYLIEN provides a solution to this problem by classifying some words as "enties".
What is an entity? The Oxford English Dictionary provides a basic starting point of what an entity is, with its definition being "a thing with distinct and independent existence". Learn more about searching for entities here.
We can use entity types to search for groups of entities without the need for defining an exhaustive list of DBPedia links.
Returning to our query that pulled stories classifed as "banking", let's pull all articles categorised as banking that also feature a "Company" or "Bank" entity type in the title:
N.B. AYLIEN's knowlede base switched from using DBPedia (V2 entities) to Wikidata (V3 entities) in February 2021. If you recquire syntax relating to V2, please contact sales@aylien.com.
# define the query parameters
params = {
'language[]': ['en'],
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'categories.taxonomy': 'iptc-subjectcode',
'categories.id[]': ['04006002'],
'entities.title.type[]': ["Company", "Bank"],
'cursor': '*',
'per_page' : 10
}
print(params)
stories = get_top_ranked_stories(params, 10)
print('************')
print("Fetched %s stories" %(len(stories)))
{'language[]': ['en'], 'published_at.start': 'NOW-2DAYS', 'published_at.end': 'NOW', 'categories.taxonomy': 'iptc-subjectcode', 'categories.id[]': ['04006002'], 'entities.title.type[]': ['Company', 'Bank'], 'cursor': '*', 'per_page': 10} {'categories.id[]': ['04006002'], 'categories.taxonomy': 'iptc-subjectcode', 'cursor': '*', 'entities.title.type[]': ['Company', 'Bank'], 'language[]': ['en'], 'per_page': 10, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS'} Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago https://journaltimes.com/lifestyles/health-med-fit/wall-street-slips-as-lockdown-protests-spread-in-china/article_81621f36-0538-520e-9234-9e04289ca9ab.html Fetched 10 stories. Total story count so far: 10 ************ Fetched 10 stories
Let's look closely at the first story in this output and review the entities in the title.
Note, some entities will be linked to a Wikiedata URLs. AYLIEN uses Wikidata to train a vast knowledge base in order to identify entities.
Other entities may not be linked to a DBPedia URL. AYLEIN also utilises a Named Entity Recognisition Model to identify entities in cases where they can't be identified from the knowledge base.
for story in stories[0:1]:
print(story['title'])
print('##############################################')
for entity in stories[0]['entities'][0:5]:
pprint(entity)
print()
Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago ############################################## {'body': {'sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 4, 'mentions': [{'index': {'end': 278, 'start': 273}, 'sentiment': {'confidence': 0.63, 'polarity': 'negative'}}, {'index': {'end': 1534, 'start': 1529}, 'sentiment': {'confidence': 0.76, 'polarity': 'negative'}}, {'index': {'end': 1878, 'start': 1873}, 'sentiment': {'confidence': 0.56, 'polarity': 'negative'}}, {'index': {'end': 2475, 'start': 2470}, 'sentiment': {'confidence': 0.61, 'polarity': 'negative'}}], 'text': 'China'}, {'frequency': 1, 'mentions': [{'index': {'end': 2542, 'start': 2535}, 'sentiment': {'confidence': 0.54, 'polarity': 'positive'}}], 'text': 'Chinese'}]}, 'external_ids': {}, 'id': 'Q148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q148', 'wikipedia': 'https://en.wikipedia.org/wiki/China'}, 'overall_frequency': 6, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 49}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'China'}]}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 85, 'start': 72}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 400, 'start': 387}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 856, 'start': 843}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 1212, 'start': 1199}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4601, 'start': 4588}, 'sentiment': {'confidence': 0.77, 'polarity': 'positive'}}], 'text': 'KEB Hana Bank'}]}, 'external_ids': {}, 'id': 'Q484047', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q484047', 'wikipedia': 'https://en.wikipedia.org/wiki/Hana_Bank'}, 'overall_frequency': 5, 'overall_prominence': 0.97, 'overall_sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Business', 'Organization']} {'body': {'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 107, 'start': 102}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 422, 'start': 417}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 878, 'start': 873}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 1234, 'start': 1229}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4623, 'start': 4618}, 'sentiment': {'confidence': 0.69, 'polarity': 'positive'}}], 'text': 'Seoul'}]}, 'external_ids': {}, 'id': 'Q8684', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q8684', 'wikipedia': 'https://en.wikipedia.org/wiki/Seoul'}, 'overall_frequency': 5, 'overall_prominence': 0.93, 'overall_sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['City', 'Location', 'Organization', 'Community']} {'body': {'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 120, 'start': 109}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 435, 'start': 424}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 891, 'start': 880}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 1247, 'start': 1236}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4636, 'start': 4625}, 'sentiment': {'confidence': 0.68, 'polarity': 'positive'}}], 'text': 'South Korea'}, {'frequency': 1, 'mentions': [{'index': {'end': 762, 'start': 750}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}], 'text': 'South Korean'}]}, 'external_ids': {}, 'id': 'Q884', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q884', 'wikipedia': 'https://en.wikipedia.org/wiki/South_Korea'}, 'overall_frequency': 6, 'overall_prominence': 0.92, 'overall_sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N279424833613967807707022612475825359786', 'overall_frequency': 1, 'overall_prominence': 0.91, 'overall_sentiment': {'confidence': 0.55, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.55, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 79, 'start': 63}, 'sentiment': {'confidence': 0.55, 'polarity': 'neutral'}}], 'text': 'DAMIAN J. TROISE'}]}, 'types': ['Human']}
Depending on your query, we should see that the classifier picked up some entities. We can also see some of the entities are linked to Wikidata URLs — we will return to this below.
We are not limited to working with entities in the title however. We can also search for entities in the body of the article. Let's print out the first 10 entities in the body. We can see that AYLIEN's enrichment process identifies a whole range of entity types.
for story in stories[0:1]:
print(story['title'])
print('##############################################')
for entity in stories[0]['entities'][0:3]:
for surface_form in entity['body']['surface_forms']:
pprint(entity)
print()
Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago ############################################## {'body': {'sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 4, 'mentions': [{'index': {'end': 278, 'start': 273}, 'sentiment': {'confidence': 0.63, 'polarity': 'negative'}}, {'index': {'end': 1534, 'start': 1529}, 'sentiment': {'confidence': 0.76, 'polarity': 'negative'}}, {'index': {'end': 1878, 'start': 1873}, 'sentiment': {'confidence': 0.56, 'polarity': 'negative'}}, {'index': {'end': 2475, 'start': 2470}, 'sentiment': {'confidence': 0.61, 'polarity': 'negative'}}], 'text': 'China'}, {'frequency': 1, 'mentions': [{'index': {'end': 2542, 'start': 2535}, 'sentiment': {'confidence': 0.54, 'polarity': 'positive'}}], 'text': 'Chinese'}]}, 'external_ids': {}, 'id': 'Q148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q148', 'wikipedia': 'https://en.wikipedia.org/wiki/China'}, 'overall_frequency': 6, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 49}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'China'}]}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 4, 'mentions': [{'index': {'end': 278, 'start': 273}, 'sentiment': {'confidence': 0.63, 'polarity': 'negative'}}, {'index': {'end': 1534, 'start': 1529}, 'sentiment': {'confidence': 0.76, 'polarity': 'negative'}}, {'index': {'end': 1878, 'start': 1873}, 'sentiment': {'confidence': 0.56, 'polarity': 'negative'}}, {'index': {'end': 2475, 'start': 2470}, 'sentiment': {'confidence': 0.61, 'polarity': 'negative'}}], 'text': 'China'}, {'frequency': 1, 'mentions': [{'index': {'end': 2542, 'start': 2535}, 'sentiment': {'confidence': 0.54, 'polarity': 'positive'}}], 'text': 'Chinese'}]}, 'external_ids': {}, 'id': 'Q148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q148', 'wikipedia': 'https://en.wikipedia.org/wiki/China'}, 'overall_frequency': 6, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 49}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'China'}]}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 85, 'start': 72}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 400, 'start': 387}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 856, 'start': 843}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 1212, 'start': 1199}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4601, 'start': 4588}, 'sentiment': {'confidence': 0.77, 'polarity': 'positive'}}], 'text': 'KEB Hana Bank'}]}, 'external_ids': {}, 'id': 'Q484047', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q484047', 'wikipedia': 'https://en.wikipedia.org/wiki/Hana_Bank'}, 'overall_frequency': 5, 'overall_prominence': 0.97, 'overall_sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Business', 'Organization']} {'body': {'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 107, 'start': 102}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 422, 'start': 417}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 878, 'start': 873}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 1234, 'start': 1229}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4623, 'start': 4618}, 'sentiment': {'confidence': 0.69, 'polarity': 'positive'}}], 'text': 'Seoul'}]}, 'external_ids': {}, 'id': 'Q8684', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q8684', 'wikipedia': 'https://en.wikipedia.org/wiki/Seoul'}, 'overall_frequency': 5, 'overall_prominence': 0.93, 'overall_sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['City', 'Location', 'Organization', 'Community']}
We have seen how AYLIEN's NLP enrichment identifies entities and that some entities are tagged with a Wikidata URLs. Entities can be useful when a keyword or search term can refer to multiple entities. For example, let's imagine we are interested in finding news regarding the company, Apple — how do we restrict searches for the company only and ignore searches for the fruit? We could search for the keyword "Apple" and also search for company entity types as described above, but then we would run the risk of returning titles that include companies other than Apple Inc. but that mention the fruit, apple. We can, however, perform a more specific search using Wikidata and Wikipedia URLs.
Wikidata is a semantic web project that extracts structured information created as part of the Wikipedia project where distinct entities are referred to by URIs (like https://en.wikipedia.org/wiki/Apple_Inc. and https://www.wikidata.org/wiki/Q312). Using these URIs, we can perform very specific searches for topics and reduce the ambiguity in our query. Searching by URI will also identify different surface forms that link to Apple e.g. "Apple", "Apple Inc." and the Apple stock ticker, "AAPL".
Below, we'll demonstrate a search for Citigroup using its Wikiedpia URL.
N.B. AYLIEN's knowlede base switched from using DBPedia (V2 entities) to Wikidata (V3 entities) in February 2021. If you recquire syntax relating to V2, please contact sales@aylien.com.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{links.wikipedia:"https://en.wikipedia.org/wiki/Citigroup" }}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Citigroup')
print()
{'aql': 'entities: {{links.wikipedia:"https://en.wikipedia.org/wiki/Citigroup" ' '}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} Wells Fargo & Company MN Sells 97,148 Shares of Analog Devices, Inc. (NASDAQ:ADI) https://www.dispatchtribunal.com/2022/11/28/wells-fargo-company-mn-sells-97148-shares-of-analog-devices-inc-nasdaqadi.html Fetched 5 stories. Total story count so far: 5 ############# Wells Fargo & Company MN Sells 97,148 Shares of Analog Devices, Inc. (NASDAQ:ADI) https://www.dispatchtribunal.com/2022/11/28/wells-fargo-company-mn-sells-97148-shares-of-analog-devices-inc-nasdaqadi.html Keyword mention: wn 85.22% of the company’s stock. A number of research analysts have issued reports on ADI shares. Citigroup upped their target price on shares of Analog Devic KPMG bets on Manchester with tech jobs and 'sprint' rooms https://www.accountingtoday.com/articles/kpmg-bets-on-manchester-with-tech-jobs-and-sprint-rooms Keyword mention: he latest international firm to grow beyond London, with banks such as Goldman Sachs Group Inc. and Citigroup Inc. finding it easier to secure lower costs and s Beaird Harris Wealth Management LLC Has $138,000 Holdings in DTE Energy (NYSE:DTE) https://www.com-unik.info/2022/11/28/beaird-harris-wealth-management-llc-has-138000-holdings-in-dte-energy-nysedte.html Keyword mention: erts: Wall Street Analysts Forecast Growth A number of research firms have issued reports on DTE. Citigroup cut their price target on DTE Energy from $146.00 Teton Advisors Inc. Has $1.72 Million Position in World Wrestling Entertainment, Inc. (NYSE:WWE) https://www.dailypolitical.com/2022/11/28/teton-advisors-inc-has-1-72-million-position-in-world-wrestling-entertainment-inc-nysewwe.html Keyword mention: to $50.00 and gave the stock an “underweight” rating in a research note on Wednesday, August 17th. Citigroup boosted their target price on shares of World Wres Gamco Investors INC. ET AL Raises Stock Position in Tredegar Co. (NYSE:TG) https://www.themarketsdaily.com/2022/11/28/gamco-investors-inc-et-al-raises-stock-position-in-tredegar-co-nysetg.html Keyword mention: rials company’s stock valued at $435,000 after buying an additional 1,826 shares during the period. Citigroup Inc. boosted its stake in Tredegar by 9.4% during
We can search for entities using their Wikidata ID as per below.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{links.wikidata:"https://www.wikidata.org/wiki/Q219508" }}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Citigroup')
print()
{'aql': 'entities: {{links.wikidata:"https://www.wikidata.org/wiki/Q219508" }}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} Teton Advisors Inc. Has $1.72 Million Position in World Wrestling Entertainment, Inc. (NYSE:WWE) https://www.dailypolitical.com/2022/11/28/teton-advisors-inc-has-1-72-million-position-in-world-wrestling-entertainment-inc-nysewwe.html Fetched 5 stories. Total story count so far: 5 ############# Teton Advisors Inc. Has $1.72 Million Position in World Wrestling Entertainment, Inc. (NYSE:WWE) https://www.dailypolitical.com/2022/11/28/teton-advisors-inc-has-1-72-million-position-in-world-wrestling-entertainment-inc-nysewwe.html Keyword mention: to $50.00 and gave the stock an “underweight” rating in a research note on Wednesday, August 17th. Citigroup boosted their target price on shares of World Wres Gamco Investors INC. ET AL Raises Stock Position in Tredegar Co. (NYSE:TG) https://www.themarketsdaily.com/2022/11/28/gamco-investors-inc-et-al-raises-stock-position-in-tredegar-co-nysetg.html Keyword mention: rials company’s stock valued at $435,000 after buying an additional 1,826 shares during the period. Citigroup Inc. boosted its stake in Tredegar by 9.4% during Coherent slips even as Deutsche Bank upgrades, saying bear case 'not as bad as feared' https://seekingalpha.com/news/3911636-coherent-slips-even-as-deutsche-bank-upgrades-saying-bear-case-not-as-bad-as-feared?utm_source=feed_news_all&utm_medium=referral Keyword mention: Trian Fund Management L.P. Raises Stake in General Electric (NYSE:GE) https://mayfieldrecorder.com/2022/11/28/trian-fund-management-l-p-raises-stake-in-general-electric-nysege.html Keyword mention: to $78.00 and set an “overweight” rating on the stock in a research report on Monday, October 3rd. Citigroup increased their price objective on shares of Gener Pin Oak Investment Advisors Inc. Increases Position in Kimbell Royalty Partners, LP (NYSE:KRP) https://slatersentinel.com/news/2022/11/28/pin-oak-investment-advisors-inc-increases-position-in-kimbell-royalty-partners-lp-nysekrp.html Keyword mention: 's stock. Analyst Upgrades and Downgrades KRP has been the topic of a number of research reports. Citigroup assumed coverage on Kimbell Royalty Partners in a
Sometimes we might want to search for an entity by surface form (i.e. the text metnioned) rather than the wiki ID. This may because we want to limit to a certain surface form (MSFT and not Microsoft) or becuase the entity is not in wikidata and so not in our kenoweldege base. Our Named Entity Recognition model and still recognise entities that are not in wikidata, based on the context of the document. This is useful for searching for lesser known companies, SMEs or start-ups.
In the code below I use the code surface_forms.text - this is a full text search. This means that
In contrast, searching via surface_forms on its own will perform an exact string match search i.e. case sensitive with special characters included.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
#, 'aql': 'entities: {{surface_forms:"Boeing"}}'
, 'aql': 'entities: {{surface_forms.text:"Boeing"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Boeing')
print()
{'aql': 'entities: {{surface_forms.text:"Boeing"}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} ВОЙНА В Украине https://izvestia.kiev.ua/item/show/148204 Fetched 5 stories. Total story count so far: 5 ############# ВОЙНА В Украине https://izvestia.kiev.ua/item/show/148204 Keyword mention: Boeing has proposed to produce small diameter of land bas Russia Won't Stop Strikes until It Runs Out of Missiles, Ukraine's Zelenskiy Says https://english.aawsat.com/home/article/4013126/russia-won%E2%80%99t-stop-strikes-until-it-runs-out-missiles-ukraine%E2%80%99s-zelenskiy-says Keyword mention: . In the latest example of Western military aid to Kyiv, the Pentagon is considering a proposal by Boeing to supply Ukraine with cheap, small precision bomb محلل سياسى: استمرار الحرب الروسية الأوكرانية يضع مستقبل أوروبا على المحك https://www.youm7.com/story/2022/11/28/%D9%85%D8%AD%D9%84%D9%84-%D8%B3%D9%8A%D8%A7%D8%B3%D9%89-%D8%A7%D8%B3%D8%AA%D9%85%D8%B1%D8%A7%D8%B1-%D8%A7%D9%84%D8%AD%D8%B1%D8%A8-%D8%A7%D9%84%D8%B1%D9%88%D8%B3%D9%8A%D8%A9-%D8%A7%D9%84%D8%A3%D9%88%D9%83%D8%B1%D8%A7%D9%86%D9%8A%D8%A9-%D9%8A%D8%B6%D8%B9-%D9%85%D8%B3%D8%AA%D9%82%D8%A8%D9%84-%D8%A3%D9%88%D8%B1%D9%88%D8%A8%D8%A7-%D8%B9%D9%84%D9%89/5992901 Keyword mention: the mental image of how America can help its allies and NATO countries. The Washington study of the Boeing proposal to provide "Keeff" with accurate bombs is USA harkitsee Boeingin ja Saabin kehittämän täsmäpommin lähettämistä Ukrainaan – GLSDB-pommi mahdollistaisi iskut yli 100 km Venäjän selustaan https://www.talouselama.fi/uutiset/usa-harkitsee-boeingin-ja-saabin-kehittaman-tasmapommin-lahettamista-ukrainaan-glsdb-pommi-mahdollistaisi-iskut-yli-100-km-venajan-selustaan/abaa49d0-d08e-492e-97f4-f0a0af921177 Keyword mention: ng sending the GLSDB (Ground-Lunched Small Diamond Bomb) to Ukraine, which was developed jointly by Boeing and Saab. The news agency reports on the nameless Russia won't stop strikes until it runs out of missiles, Ukraine's Zelenskiy says https://nationalpost.com/pmn/news-pmn/russia-wont-stop-strikes-until-it-runs-out-of-missiles-ukraines-zelenskiy-says Keyword mention: . In the latest example of Western military aid to Kyiv, the Pentagon is considering a proposal by Boeing to supply Ukraine with cheap, small precision bomb
We can search for entities using their stock ticker (where supported).
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{stock_ticker:GOOGL }}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Google')
print()
{'aql': 'entities: {{stock_ticker:GOOGL }}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} JEPI Vs. SPY: The Relative Lead Unlikely To Continue In 2023 https://seekingalpha.com/article/4560842-jepi-vs-spy-the-relative-lead-unlikely-to-continue-in-2023?source=feed_all_articles Fetched 5 stories. Total story count so far: 5 ############# JEPI Vs. SPY: The Relative Lead Unlikely To Continue In 2023 https://seekingalpha.com/article/4560842-jepi-vs-spy-the-relative-lead-unlikely-to-continue-in-2023?source=feed_all_articles Keyword mention: Investors Increasingly Impatient with Slow Pace of Autonomous Vehicles https://programbusiness.com/news/investors-increasingly-impatient-with-slow-pace-of-autonomous-vehicles/ Keyword mention: ut costs during an economic slowdown. An influential hedge fund has also questioned Alphabet Inc.’s Google s years-long effort to advance self-driving techno Yahoo buys nearly 25% stake in advertising tech firm Taboola https://infotechlead.com/digital/yahoo-buys-nearly-25-stake-in-advertising-tech-firm-taboola-75712 Keyword mention: Allianz Asset Management GmbH Acquires 30,898 Shares of Alphabet Inc. (NASDAQ:GOOG) https://mayfieldrecorder.com/2022/11/28/allianz-asset-management-gmbh-acquires-30898-shares-of-alphabet-inc-nasdaqgoog.html Keyword mention: , Europe, the Middle East, Africa, the Asia-Pacific, Canada, and Latin America. It operates through Google Services, Google Cloud, and Other Bets segments. T SPACs Slap Some Lipstick on Their Penny-Stock Pigs https://medworm.com/1053087611/spacs-slap-some-lipstick-on-their-penny-stock-pigs/ Keyword mention:
Sometimes if we are searching for an entity surface form, we may want to specify the entity type to help identify the correct entity. This may be becuase the entity is not recognised in wikidata and therefore not in the AYLIEN knowledge base.
However, our Named Entity Recognistion model can predict what entity type the entity is (i.e. Person, Organization, Location etc.) even if it is not in wikidata. This enables us to search for entity surface forms and explictly state what type of entity they should be.
Below we searcg for the surface form "Apple" and specify that we are looking for an Organization entity type.
params = {
"aql": "entities:{{surface_forms.text:Apple AND type:Organization}}"
, "categories_taxonomy": "iptc-subjectcode"
, "categories_id": ["04000000"]
, "language": ["en"]
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Apple')
print()
{'aql': 'entities:{{surface_forms.text:Apple AND type:Organization}}', 'categories_id': ['04000000'], 'categories_taxonomy': 'iptc-subjectcode', 'language': ['en'], 'per_page': 5} Ahead of Market: 10 things that will decide D-Street action on Tuesday https://economictimes.indiatimes.com/markets/stocks/news/ahead-of-market-10-things-that-will-decide-d-street-action-on-tuesday/articleshow/95835927.cms Fetched 5 stories. Total story count so far: 5 ############# Ahead of Market: 10 things that will decide D-Street action on Tuesday https://economictimes.indiatimes.com/markets/stocks/news/ahead-of-market-10-things-that-will-decide-d-street-action-on-tuesday/articleshow/95835927.cms Keyword mention: r Monday sales were set for a record.The biggest drag on the benchmark S&P 500 index, however, were Apple Inc shares, which fell 1.5% after a report that th WhatsApp Message Yourself feature starts rolling out: Here's how to use it Keyword mention: eature, users must update the WhatsApp app on their smartphone. To do so, head to Google Play Store/Apple App Store and install the latest version of the ap 'A Christmas miracle': Woman kidnapped as child reunites with family 51 years later https://headtopics.com/us/a-christmas-miracle-woman-kidnapped-as-child-reunites-with-family-51-years-later-32220438 Keyword mention: ry isn't too difficult to figure out, the change of pace for a Hallmark movie is welcomed. In 2021, Apple agreed to broadcast A Charlie Brown Christmas on P The Best Cyber Monday deals available now https://headtopics.com/us/the-best-cyber-monday-deals-available-now-32218332 Keyword mention: he 2021 iPad Pro 11-inch with an M1 chip, well, here is the follow-up: you can also get the 2nd gen Apple Pencil that works great with it at a $40 off price Amazon, union organizer head to court over COVID-based class racial-bias lawsuit https://thegrio.com/2022/11/28/amazon-union-organizer-smalls-head-to-court-covid-based-class-racial-bias-lawsuit/ Keyword mention: acility, it would weaken the claims within the racial-bias lawsuit. TheGrio is FREE on your TV via Apple TV, Amazon Fire, Roku and Android TV. Also, please
We can specify where in the article we want to find the entity by specifying the title or body elements.
params = {
"aql": "entities:{{element:title AND surface_forms:Apple}}"
, "categories_taxonomy": "iptc-subjectcode"
, "categories_id": ["04000000"]
, "language": ["en"]
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print()
{'aql': 'entities:{{element:title AND surface_forms:Apple}}', 'categories_id': ['04000000'], 'categories_taxonomy': 'iptc-subjectcode', 'language': ['en'], 'per_page': 5} Apple’s Change to AirDrop Is Hurting Chinese Protests https://www.techinvestornews.com/Tech-News/Latest-Headlines/apples-change-to-airdrop-is-hurting-chinese-protests Fetched 5 stories. Total story count so far: 5 ############# Apple’s Change to AirDrop Is Hurting Chinese Protests https://www.techinvestornews.com/Tech-News/Latest-Headlines/apples-change-to-airdrop-is-hurting-chinese-protests The Best Apple Cyber Monday Deals https://headtopics.com/us/the-best-apple-cyber-monday-deals-32218396 Gwyneth Paltrow Reunites With Look-Alike Daughter Apple, 18, In NYC On Teen's College Break: Photos https://www.newsbreak.com/news/2839249825578/gwyneth-paltrow-reunites-with-look-alike-daughter-apple-18-in-nyc-on-teen-s-college-break-photos Why Apple Stock Is Sinking Today https://www.fool.com/investing/2022/11/28/why-apple-stock-is-sinking-today/?source=iedfolrf0000001 Snap up a £30 saving on the Apple Watch ultra this Cyber Monday https://theworldnews.net/gb-news/snap-up-a-ps30-saving-on-the-apple-watch-ultra-this-cyber-monday
We can add logic to search for multiple entities at once. Note in this example we are using the OR operator to search for one of two entities.
params = {
'published_at.start': 'NOW-30DAYS'
, 'published_at.end': 'NOW'
, 'aql': 'entities:{{element:title AND surface_forms: "Deloitte"}} OR entities:{{element:title AND surface_forms: "Accenture"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print()
{'aql': 'entities:{{element:title AND surface_forms: "Deloitte"}} OR ' 'entities:{{element:title AND surface_forms: "Accenture"}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'} BRITISH COLUMBIA INVESTMENT MANAGEMENT Corp Sells 24,262 Shares of Accenture plc (NYSE:ACN) https://baseballnewssource.com/2022/11/28/british-columbia-investment-management-corp-sells-24262-shares-of-accenture-plc-nyseacn/7861473.html Fetched 5 stories. Total story count so far: 5 ############# BRITISH COLUMBIA INVESTMENT MANAGEMENT Corp Sells 24,262 Shares of Accenture plc (NYSE:ACN) https://baseballnewssource.com/2022/11/28/british-columbia-investment-management-corp-sells-24262-shares-of-accenture-plc-nyseacn/7861473.html Tvh оцифровывает свою глобальную сеть складов с помощью Körber и Accenture — Data Intelligence. https://zephyrnet.com/ru/tvh-%D0%BE%D1%86%D0%B8%D1%84%D1%80%D0%BE%D0%B2%D1%8B%D0%B2%D0%B0%D0%B5%D1%82-%D1%81%D0%B2%D0%BE%D1%8E-%D0%B3%D0%BB%D0%BE%D0%B1%D0%B0%D0%BB%D1%8C%D0%BD%D1%83%D1%8E-%D1%81%D0%B5%D1%82%D1%8C-%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2-%D1%81-%D0%BF%D0%BE%D0%BC%D0%BE%D1%89%D1%8C%D1%8E-korber-%D0%B8-accure-5/ Ensign Peak Advisors Inc Lowers Position in Accenture plc (NYSE:ACN) https://www.americanbankingnews.com/2022/11/28/ensign-peak-advisors-inc-lowers-position-in-accenture-plc-nyseacn.html Purdue, Accenture sign five-year agreement in support of smart manufacturing https://www.purdue.edu/newsroom/releases/2022/Q4/purdue,-accenture-sign-five-year-agreement-in-support-of-smart-manufacturing.html Deloitte mandated for revisited Hassyan IWP
We can also limit to the stories we want by enttiy sentiment, as exemplified below. Here we will search for negative mentions of Citigroup.
params = {
"aql": "entities:{{element:title AND surface_forms:Citigroup AND sentiment:negative}}"
, "publised_at_start": "NOW-10DAYS"
, "period": "+1DAY"
, "language": ["en"]
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print()
{'aql': 'entities:{{element:title AND surface_forms:Citigroup AND ' 'sentiment:negative}}', 'language': ['en'], 'per_page': 5, 'period': '+1DAY', 'publised_at_start': 'NOW-10DAYS'} Magna International (NYSE:MGA) Downgraded by Citigroup https://www.com-unik.info/2022/11/27/magna-international-nysemga-downgraded-by-citigroup.html Fetched 5 stories. Total story count so far: 5 ############# Magna International (NYSE:MGA) Downgraded by Citigroup https://www.com-unik.info/2022/11/27/magna-international-nysemga-downgraded-by-citigroup.html Magna International (NYSE:MGA) Downgraded by Citigroup https://www.thelincolnianonline.com/2022/11/27/magna-international-nysemga-downgraded-by-citigroup.html MacroGenics (NASDAQ:MGNX) Downgraded by Citigroup https://www.com-unik.info/2022/11/27/macrogenics-nasdaqmgnx-downgraded-by-citigroup.html MacroGenics (NASDAQ:MGNX) Downgraded by Citigroup https://www.thelincolnianonline.com/2022/11/27/macrogenics-nasdaqmgnx-downgraded-by-citigroup.html Magna International (NYSE:MGA) Downgraded by Citigroup https://zolmax.com/investing/magna-international-nysemga-downgraded-by-citigroup/8168431.html
Here we will isolate the Citigroup entity in the first story to show it is classified with negative sentiment.
for entity in stories[0]['entities']:
for surface_form in entity['title']['surface_forms']:
if 'Citigroup' in surface_form['text']:
pprint(entity)
{'body': {'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 9, 'start': 0}, 'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}}], 'text': 'Citigroup'}]}, 'external_ids': {}, 'id': 'Q219508', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q219508', 'wikipedia': 'https://en.wikipedia.org/wiki/Citigroup'}, 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'stock_tickers': ['C'], 'title': {'sentiment': {'confidence': 0.53, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 45}, 'sentiment': {'confidence': 0.53, 'polarity': 'negative'}}], 'text': 'Citigroup'}]}, 'types': ['Business', 'Organization', 'Financial_institution']}
Entity prominence is a measure of how significant a mention of an entity is on a scale of 0-1.
Intuitively - as consumers of news - we know if an entity appears in the title, in the first paragaph or many times in an article, then it is pretty significant. AYLIEN's entioty prominence metric catpures this signficance.
We can use this as a query paramter to filter out insignificant mentions of an entity by setting an entity prominence threshold. We can also sort by entity prominence to see the most significant mentions first. For more ways to sort your query output see here.
params = {
'published_at.start': 'NOW-30DAYS'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{surface_forms: "Citigroup" AND overall_prominence:[0.6 TO *] sort_by(overall_prominence)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##############')
print('Title:')
print_keyword_mention(story, 'title', 'Citigroup')
print()
print('Mention:')
print_keyword_mention(story, 'body', 'Citigroup')
{'aql': 'entities: {{surface_forms: "Citigroup" AND overall_prominence:[0.6 TO ' '*] sort_by(overall_prominence)}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'} Vestmark Advisory Solutions Inc. Purchases 10,915 Shares of Citigroup Inc. (NYSE:C) https://www.themarketsdaily.com/2022/10/29/vestmark-advisory-solutions-inc-purchases-10915-shares-of-citigroup-inc-nysec.html Fetched 5 stories. Total story count so far: 5 ############## Title: 10,915 Shares of Citigroup Inc. (NYSE:C) Mention: Citigroup Inc. (NYSE:C – Get Rating) by 16.5% during the sec ############## Title: Citigroup China Technology Forum 2022 Mention: a new form of trade, but the brutal development of the industry In this context, on October 28, the Citigroup based Changed Opportunities & Technology Forum | 2 ############## Title: Citigroup Lowers Visa (NYSE:V) Price Target to $238.00 Mention: Citigroup from $254.00 to $238.00 in a research note publish ############## Title: OSCO SHIPPING (OTCMKTS:CICOF) Cut to Sell at Citigroup Mention: Citigroup downgraded shares of COSCO SHIPPING ( OTCMKTS:CICO ############## Title: Citigroup Lowers YETI (NYSE:YETI) Price Target to $43.00 Mention: Citigroup from $57.00 to $43.00 in a report issued on Friday
So far we have pulled stories in English only. However, our News API supports 6 native languages and 10 translated languages:
Native Languages:
Translated Languages:
Let's perform a search in some native languages other than English. Here we'll search for stories featuring Citigroup in the title and print the native language title and an English title.
# define the query parameters
params = {
'language': ['de', 'fr', 'it', 'es', 'pt'],
'title': 'Citigroup',
'published_at.start':'NOW-10DAYS',
'published_at.end':'NOW',
'cursor': '*',
'per_page' : 50
}
print(params)
stories = get_top_ranked_stories(params, 100)
print('************')
print("Fetched %s stories" %(len(stories)))
for story in stories:
print(story['title'])
print(story['translations']['en']['title'])
print('')
{'language': ['de', 'fr', 'it', 'es', 'pt'], 'title': 'Citigroup', 'published_at.start': 'NOW-10DAYS', 'published_at.end': 'NOW', 'cursor': '*', 'per_page': 50} {'cursor': '*', 'language': ['de', 'fr', 'it', 'es', 'pt'], 'per_page': 50, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS', 'title': 'Citigroup'} ¿Carlos Slim e Inbursa abandonan la carrera: ya no comprarán Banamex a Citigroup? https://www.capitalmexico.com.mx/mundo/carlos-slim-e-inbursa-abandonan-la-carrera-ya-no-compraran-banamex-a-citigroup/ Fetched 28 stories. Total story count so far: 28 Fetched 0 stories. Total story count so far: 28 ************ Fetched 28 stories ¿Carlos Slim e Inbursa abandonan la carrera: ya no comprarán Banamex a Citigroup? Are Slim and Inbursa leaving the race: will Banamex no longer buy Citigroup? Citigroup Citigroup Citigroup Citigroup Tesla, Inc. : Citigroup cambia a neutral | MarketScreener Tesla, Inc. : Citigroup changes to neutral | MarketScreener El multimillonario mexicano Carlos Slim descarta comprar Banamex a Citigroup Mexican millionaire Carlos Slim dismisses Banamex to Citigroup México: Banco de Slim se retira de compra de Banamex El grupo financiero Inbursa, del millonario mexicano Carlos Slim, anuncia su retiro del proceso de compra de Banamex, uno de los principales bancos de México que la corporación estadounidense Citigroup espera vender en los próximos meses Associated Press Nov 23, 2022 30 min ago Mexico: Slim Bank withdraws from Banamex The financial group Inbursa, of Mexican millionaire Carlos Slim, announces its withdrawal from the Banamex purchase process, one of Mexico's major banks that the American firm Citigroup expects to sell in the coming months sociated Press Nov 23, 2022 30 min ago México: Banco de Slim se retira de compra de Banamex El grupo financiero Inbursa, del millonario mexicano Carlos Slim, anuncia su retiro del proceso de compra de Banamex, uno de los principales bancos de México que la corporación estadounidense Citigroup espera vender en los próximos meses Associated Press Nov 23, 2022 14 min ago Mexico: Slim Bank withdraws from Banamex The financial group Inbursa, of Mexican millionaire Carlos Slim, announces its withdrawal from the Banamex purchase process, one of Mexico's major banks that the American firm Citigroup expects to sell in the coming months sociated Press Nov 23, 2022 14 min ago Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan DICK'S Sporting Goods, Inc. : El Citigroup continua con su recomendación de compra | MarketScreener DICK'S Sporting Goods, Inc. : The Citigroup continues with its purchase recommendation | MarketScreener El multimillonario mexicano Carlos Slim descarta comprar Banamex a Citigroup Mexican millionaire Carlos Slim dismisses Banamex to Citigroup Reguladores de EE.UU. pidieron a Citigroup mejorar su plan de simulación de quiebras US regulators They asked Citigroup to improve its bankruptcy simulation plan Reguladores instan a Citigroup a corregir plan de quiebra Regulators urge Citigroup to correct bankruptcy plan Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Reguladores de EU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Dell Technologies Inc. : Citigroup reitera su recomendación de compra | MarketScreener Dell Technologies Inc. : Citigroup reiterates its purchase recommendation | MarketScreener BP plc : Citigroup Cambia su recomendación a compra | MarketScreener BP Plc : Citigroup Change your purchase recommendation | MarketScreener Unity Software Inc. : Obtiene una recomendación de compra de Citigroup | MarketScreener Unit Software Inc. : Get a Citigroup Buy recommendation | MarketScreener Rackspace Technology, Inc. : El Citigroup se mantiene neutral | MarketScreener Rackspace Technology, Inc. : Citigroup remains neutral | MarketScreener Eneti Inc. : Obtiene una recomendación de compra de Citigroup | MarketScreener Eneti Inc. : Get a Citigroup Buy recommendation | MarketScreener Macy's, Inc. : El Citigroup se mantiene neutral | MarketScreener Macy's, Inc. : Citigroup remains neutral | MarketScreener Bath & Body Works, Inc. : recomendación de compra de Citigroup | MarketScreener Bath & Body Works, Inc. : Citigroup's Buy Recommendation | MarketScreener IBEX Limited : Citigroup permanece neutral | MarketScreener IBEX Limited : Citigroup remains neutral | MarketScreener Roblox Corporation : El Citigroup continua con un recomendación de compra | MarketScreener Robles Corporation : Citigroup continues with a shopping recommendation | MarketScreener NetEase, Inc. : Citigroup mantiene su recomendación de compra | MarketScreener NetEase, Inc. : Citigroup maintains its purchase recommendation | MarketScreener Autodesk, Inc. : Citigroup mantiene su recomendación de compra | MarketScreener Autodesk, Inc. : Citigroup maintains its purchase recommendation | MarketScreener HP Inc. : Citigroup se mantiene neutral. | MarketScreener HP Inc. : Citigroup remains neutral. | MarketScreener
Up to now we have interrogated our News API output by converting the JSON objects to Python dictionaries, iterating through them and printing the elements. Sometimes we may wish to view the data in a more tabular format. Below, we will loop through our non-English content stories and create a Pandas dataframe. This will also be useful later when we want to visualize our data.
We'll also pull out some contextual information about each story such as the article's permalink and the stories' sentiment score. AYLIEN's enrichment process predicts the overall sentiment in the body and title of a document as positive, negative and neutral and also outputs a confidence score.
# create dataframe in the format we want
my_columns = ['id', 'title', 'title_eng', 'permalink', 'published_at', 'source', 'body_polarity', 'body_polarity_score']
my_data_frame = []
for story in stories:
# make array of the fields we're interested in
data = [
story['id']
, story['title']
, story['translations']['en']['title']
, story['links']['permalink']
, story['published_at']
, story['source']['domain']
, story['sentiment']['body']['polarity']
, story['sentiment']['body']['score']
]
zipped = zip(my_columns, data)
a_dictionary = dict(zipped)
my_data_frame.append(a_dictionary)
my_data_frame = pd.DataFrame(my_data_frame, columns = my_columns)
my_data_frame.head(5)
id | title | title_eng | permalink | published_at | source | body_polarity | body_polarity_score | |
---|---|---|---|---|---|---|---|---|
0 | 5074728360 | ¿Carlos Slim e Inbursa abandonan la carrera: y... | Are Slim and Inbursa leaving the race: will Ba... | https://www.capitalmexico.com.mx/mundo/carlos-... | 2022-11-24T17:49:13Z | capitalmexico.com.mx | negative | 0.67 |
1 | 5074491026 | Citigroup | Citigroup | https://lado.mx/trending.php?id=5756 | 2022-11-24T14:33:36Z | lado.mx | positive | 0.51 |
2 | 5074310047 | Citigroup | Citigroup | https://www.lado.mx/trending.php?id=5756 | 2022-11-24T12:28:53Z | lado.mx | positive | 0.70 |
3 | 5074108547 | Tesla, Inc. : Citigroup cambia a neutral | Mar... | Tesla, Inc. : Citigroup changes to neutral | M... | https://es.marketscreener.com/cotizacion/accio... | 2022-11-24T09:48:26Z | marketscreener.com | neutral | 0.60 |
4 | 5073894044 | El multimillonario mexicano Carlos Slim descar... | Mexican millionaire Carlos Slim dismisses Bana... | https://palabrasclaras.mx/economia/el-multimil... | 2022-11-24T06:07:00Z | palabrasclaras.mx | negative | 0.58 |
We have seen how we can pull granular stories using the Stories endpoint. However, if we want to investigate volumes of stories over time, we can use the Timeseries endpoint. This endpoint retrieves the stories that meet our criteria and aggregates per minute, hour, day, month, or however we see fit. This can be very usfeul for identifying spikes or dips in news volume relating to a subject of interest. By default, our query below will aggregate the volume of stories per day.
The timeseries endpoint ouputs data in a json format, but out function above will convert this to a pandas dataframe for legibility.
# define the query parameters
params = {
'title': 'Citigroup',
'published_at.start':'NOW-30DAYS',
'published_at.end':'NOW',
}
timeseries_data = get_timeseries(params)
timeseries_data
{'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS', 'title': 'Citigroup'} Number of stories returned : 4,765
count | published_at | |
---|---|---|
0 | 114 | 2022-10-29 |
1 | 127 | 2022-10-30 |
2 | 179 | 2022-10-31 |
3 | 185 | 2022-11-01 |
4 | 182 | 2022-11-02 |
5 | 210 | 2022-11-03 |
6 | 216 | 2022-11-04 |
7 | 97 | 2022-11-05 |
8 | 169 | 2022-11-06 |
9 | 152 | 2022-11-07 |
10 | 214 | 2022-11-08 |
11 | 160 | 2022-11-09 |
12 | 145 | 2022-11-10 |
13 | 168 | 2022-11-11 |
14 | 119 | 2022-11-12 |
15 | 138 | 2022-11-13 |
16 | 109 | 2022-11-14 |
17 | 204 | 2022-11-15 |
18 | 213 | 2022-11-16 |
19 | 243 | 2022-11-17 |
20 | 285 | 2022-11-18 |
21 | 121 | 2022-11-19 |
22 | 138 | 2022-11-20 |
23 | 182 | 2022-11-21 |
24 | 166 | 2022-11-22 |
25 | 210 | 2022-11-23 |
26 | 121 | 2022-11-24 |
27 | 86 | 2022-11-25 |
28 | 70 | 2022-11-26 |
29 | 42 | 2022-11-27 |
We can makes sense of timeseries data much quicker if we visualize it. Below, we make use out of Plotly library to visualize the data.
fig = go.Figure( data = go.Scatter(
x = timeseries_data['published_at']
, y=timeseries_data['count']
, line=dict(color='blue')
))
# forrmat the chart
fig.update_layout(
title='Volume of Stories Over Time',
plot_bgcolor='white',
xaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, yaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
)
fig.show()
We can see from the graph that there are various spikes in news volume. We can explore the cause of these spikes by pulling a story that will give us an indication of why Citigroup received so much attention using Alexa Ranking. Alexa Ranking is an estimate of a site's popularity on the internet. Learn more about working with Alexa Ranking here.
Below, we'll identify the three dates with the most stories, then pull the highest ranked story for those dates using the same parameters we used to query the Timeseries endpoint.
# create dataframe to store the label data - note, the publihset_at and count fields are needed for x and y coords.
# the count field will be populated with the total count of stories for each respective day
my_columns = my_columns = ['published_at', 'count', 'title_1', 'title_2', 'title_3']
label_data = pd.DataFrame(columns = my_columns)
# identify the dates with most stories
top_3_dates = timeseries_data.sort_values(by=['count'], ascending = False)[0:3]
# define the query parameters
params = {
'title': 'Citigroup',
'published_at.start':'NOW-30DAYS',
'published_at.end':'NOW',
'sort_by' : "source.rankings.alexa.rank"
}
for index, row in top_3_dates.iterrows():
params['published_at.start'] = str(row['published_at'] ) + 'T00:00:00Z'
params['published_at.end'] = str(row['published_at'] + datetime.timedelta(days=1)) + 'T00:00:00Z'
# retirve the top ranked story per date
stories = get_top_ranked_stories(params, 3)
print(stories[0]['published_at'])
data = [[
params['published_at.start'] # include the start date for visualization
, row['count']
# use function to return translated content if ncessary
, stories[0]['title']
, stories[1]['title']
, stories[2]['title']
]]
data = pd.DataFrame(data, columns = my_columns)
label_data = label_data.append(data, sort=True)
label_data
{'published_at.end': '2022-11-19T00:00:00Z', 'published_at.start': '2022-11-18T00:00:00Z', 'sort_by': 'source.rankings.alexa.rank', 'title': 'Citigroup'} Bank of America (NYSE:BAC) Downgraded by Citigroup to “Neutral” https://www.com-unik.info/2022/11/18/bank-of-america-nysebac-downgraded-by-citigroup-to-neutral.html Fetched 10 stories. Total story count so far: 10 2022-11-18T07:41:24Z
<ipython-input-38-b7bd42e78a5b>:37: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
{'cursor': 'OTQzNjA5Miw1MDY2MTAzNTAx', 'published_at.end': '2022-11-18T00:00:00Z', 'published_at.start': '2022-11-17T00:00:00Z', 'sort_by': 'source.rankings.alexa.rank', 'title': 'Citigroup'} Sight Sciences (NASDAQ:SGHT) Price Target Increased to $10.00 by Analysts at Citigroup https://dakotafinancialnews.com/2022/11/17/sight-sciences-nasdaqsght-price-target-increased-to-10-00-by-analysts-at-citigroup.html Fetched 10 stories. Total story count so far: 10 2022-11-17T16:26:46Z {'cursor': 'MjA3NjMyNiw1MDY0MDc3ODUz', 'published_at.end': '2022-11-05T00:00:00Z', 'published_at.start': '2022-11-04T00:00:00Z', 'sort_by': 'source.rankings.alexa.rank', 'title': 'Citigroup'}
<ipython-input-38-b7bd42e78a5b>:37: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
The Berkeley Group (OTCMKTS:BKGFY) Rating Lowered to Neutral at Citigroup https://www.dispatchtribunal.com/2022/11/04/the-berkeley-group-otcmktsbkgfy-rating-lowered-to-neutral-at-citigroup.html Fetched 10 stories. Total story count so far: 10 2022-11-04T05:28:57Z
<ipython-input-38-b7bd42e78a5b>:37: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
count | published_at | title_1 | title_2 | title_3 | |
---|---|---|---|---|---|
0 | 285 | 2022-11-18T00:00:00Z | Bank of America (NYSE:BAC) Downgraded by Citig... | Becton, Dickinson and (NYSE:BDX) Price Target ... | Citigroup Boosts monday.com (NASDAQ:MNDY) Pric... |
0 | 243 | 2022-11-17T00:00:00Z | Sight Sciences (NASDAQ:SGHT) Price Target Incr... | Labor Dept.: Proposed Exemption for Prohibited... | Atlas Copco (OTCMKTS:ATLKY) Raised to “Buy” at... |
0 | 216 | 2022-11-04T00:00:00Z | The Berkeley Group (OTCMKTS:BKGFY) Rating Lowe... | Teck Resources (TSE:TECK.B) Cut to Neutral at ... | Pinterest (NYSE:PINS) Price Target Increased t... |
We will now append these titles to the spikes in the graph we previously created. If we hover over the markers, the tooltip will display the relvant story title.
# split title stings over multiple lines for legibility
split_title_string(label_data, 'title_1')
split_title_string(label_data, 'title_2')
split_title_string(label_data, 'title_3')
trace_1 = go.Scatter(
x = timeseries_data['published_at']
, y=timeseries_data['count']
, name = 'Volume of Stories'
, line=dict(color='blue')
)
trace_2 = go.Scatter(
x = label_data['published_at']
, y = label_data['count']
, mode ='markers'
, marker=dict(size=10,line=dict(width=2, color='blue'), color = 'white')
, text = '<b>' + label_data['count'].astype(str) + '</b><br><br>'
+ label_data['title_1_string'] + '<br><br>'
+ label_data['title_2_string'] + '<br><br>'
+ label_data['title_3_string'] + '<br><br>'
, name = 'Spike lable'
)
data = [trace_1, trace_2]
fig = go.Figure(data=data)
# forrmat the chart
fig.update_layout(
title='Volume of Stories Over Time',
legend = dict(orientation = 'h', y = -0.1),
plot_bgcolor='white',
xaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, yaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
)
fig.show()
We filter our timeseries queries in the same ways as stories, but one filter that is particularly interesting is filtering on sentiment. We have already discussed how stories are given a sentiment score at a granular level and we can use this score to pull volume of stories by title sentiment polarity over time.
In the cell below, we run a function that pulls queries the Timeseries endpoint twice — once for positive sentiment stories and once for negative stories.
# define the query parameters
params = {
'title': 'Citigroup',
'published_at.start':'NOW-30DAYS',
'published_at.end':'NOW'
}
polarities = [ 'positive', 'negative']
# Create dataframe to store the outputs
column_names = ["count", "published_at", "sentiment_title_polarity"]
timeseries_sentiment_data = pd.DataFrame(columns = column_names)
for polarity in polarities:
print('===========================================')
print(' ' + polarity + ' sentiment ')
print('===========================================')
params['sentiment_title_polarity'] = polarity
api_response = get_timeseries(params)
#add polarity indicator
api_response['sentiment_title_polarity'] = polarity
timeseries_sentiment_data = timeseries_sentiment_data.append(api_response)
print("Completed")
timeseries_sentiment_data.head()
=========================================== positive sentiment =========================================== {'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS', 'sentiment_title_polarity': 'positive', 'title': 'Citigroup'} Number of stories returned : 4,765 Completed =========================================== negative sentiment =========================================== {'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS', 'sentiment_title_polarity': 'negative', 'title': 'Citigroup'}
<ipython-input-40-5cc3873a9eb7>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Number of stories returned : 4,765 Completed
<ipython-input-40-5cc3873a9eb7>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
count | published_at | sentiment_title_polarity | |
---|---|---|---|
0 | 114 | 2022-10-29 | positive |
1 | 127 | 2022-10-30 | positive |
2 | 179 | 2022-10-31 | positive |
3 | 185 | 2022-11-01 | positive |
4 | 182 | 2022-11-02 | positive |
colours = {
'positive' : 'green'
, 'positive_opaque' : 'rgba(138, 190, 6, 0.05)'
, 'negative' : 'red'
, 'negative_opaque' : 'rgba(228, 42, 58, 0.05)'
, 'neutral' : 'rgb(40, 56, 78)'
, 'neutral_opaque' : 'rgba(40, 56, 78, 0.05)'
}
# we will plot two subplots using the same axes
fig = make_subplots(rows=1, cols=1)
counter = 0
# loop over postive and negative sentiment data to generate to line graphs
# start of for loop =======================================================================================
for polarity in polarities:
if polarity == 'negative':
# multiply absolute number of stories by -1 to visualize negative sentiment stories
factor = -1
else:
factor = 1
# filter to the data we want to visualize based on sentiment
data = timeseries_sentiment_data[timeseries_sentiment_data.sentiment_title_polarity == polarity]
fig.append_trace(go.Scatter(
x = data['published_at']
, y = data['count']*factor
, mode = 'lines'
, name = 'Vol. stories '+polarity
, line = dict(color = colours[polarity], width=1)
, fill = 'tozeroy'
, fillcolor = colours[polarity + "_opaque"]
, hovertemplate = '<b>Date</b>: %{x}<br>'
+'<b>Stories</b>: %{y}'
)
, col = 1
, row = 1)
# end of for loop =======================================================================================
# forrmat the chart
fig.update_layout(
title='Volume of Positive & Negative Sentiment Stories Over Time',
legend = dict(orientation = 'h', y = -0.1),
plot_bgcolor='white',
xaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, yaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
)
fig.show()
We can also track entity level sentiment over time.
sentiments = [1, 0, -1]
sentiments = ['positive', 'neutral', 'negative']
# create dataframe in the format we want
my_columns = ['count', 'published_at', 'sentiment']
my_data_frame = pd.DataFrame(columns = my_columns)
print('Running...')
for sentiment in sentiments:
params = {
'published_at.start': 'NOW-30DAYS'
, 'published_at.end': 'NOW'
, 'period' : '+7DAYS'
, "aql": "entities:{{element:title AND surface_forms:Citigroup AND sentiment:" + str(sentiment) + "}}"
}
timeseries = get_timeseries(params)
timeseries['sentiment'] = sentiment
my_data_frame = my_data_frame.append(timeseries)
my_data_frame = my_data_frame.reset_index(drop = True)
print('Complete')
Running... {'aql': 'entities:{{element:title AND surface_forms:Citigroup AND ' 'sentiment:positive}}', 'period': '+7DAYS', 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'} Number of stories returned : 245 {'aql': 'entities:{{element:title AND surface_forms:Citigroup AND ' 'sentiment:neutral}}', 'period': '+7DAYS', 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'}
<ipython-input-42-1bab8c1a0cf9>:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Number of stories returned : 5,390 {'aql': 'entities:{{element:title AND surface_forms:Citigroup AND ' 'sentiment:negative}}', 'period': '+7DAYS', 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'}
<ipython-input-42-1bab8c1a0cf9>:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Number of stories returned : 268 Complete
<ipython-input-42-1bab8c1a0cf9>:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
# we will plot two subplots using the same axes
fig = make_subplots(rows=1, cols=1)
colours = {'positive': 'green', 'neutral':'#A2CAE5', 'negative':'red'}
# loop over postive and negative sentiment data to generate to line graphs
# start of for loop =======================================================================================
for sentiment in sentiments:
mask = (my_data_frame['sentiment'] == sentiment)
data = my_data_frame.loc[mask]
fig.add_trace(
go.Scatter(
x = data['published_at']
, y = data['count']
, line = dict(color = colours[sentiment], width=1)
, mode = 'lines'
, name = sentiment
, showlegend = True
)
, row=1
, col=1
)
# end of for loop =======================================================================================
# forrmat the chart
fig.update_layout(
title='Sentiment Over Time',
legend = dict(orientation = 'h', y = -0.05),
plot_bgcolor='white'
)
fig.show()
Similar to the Timeseries endpoint, we may be interested in seeing themes and patterns over time that aren't immediately apparent when looking at individual stories. The Trends endpoint allows us to see the most frequently recurring entities, concepts or keywords that appear in articles that meet our search criteria.
Below we will pull the most frequently occuring entities in the body of stories mentioning Citigroup over a month.
Note- this query will take longer to run than previous endpoints as the News API is performing analysis on all entities included in all the stories that meet our search citeria.
# define the query parameters
params = {
'title': 'Citigroup',
'published_at.start':'NOW-30DAYS',
'published_at.end':'NOW',
'field' : 'entities.body.surface_forms.text'
}
print("Running...")
trends = get_trends(params)
print("Completed")
pprint(trends)
Running... Completed {'field': 'entities.body.surface_forms.text', 'published_at.end': '2022-11-28T17:03:04.054697Z', 'published_at.start': '2022-10-29T17:03:04.054631Z', 'trends': [{'count': 4529, 'value': 'Citigroup'}, {'count': 3415, 'value': 'MarketBeat.com'}, {'count': 2425, 'value': 'Fly'}, {'count': 1900, 'value': 'NYSE'}, {'count': 1651, 'value': 'Receive News & Ratings'}, {'count': 1206, 'value': 'Morgan Stanley'}, {'count': 1183, 'value': 'MarketBeat'}, {'count': 1174, 'value': 'United States'}, {'count': 1100, 'value': 'NASDAQ'}, {'count': 1033, 'value': 'Hold'}, {'count': 1024, 'value': 'Barclays'}, {'count': 1011, 'value': 'Moderate Buy'}, {'count': 1005, 'value': 'PE'}, {'count': 999, 'value': 'LLC'}, {'count': 996, 'value': 'Shares'}, {'count': 955, 'value': 'SEC'}, {'count': 862, 'value': 'EPS'}, {'count': 833, 'value': 'Inc'}, {'count': 829, 'value': 'Europe'}, {'count': 814, 'value': 'JPMorgan Chase & Co.'}, {'count': 774, 'value': 'Goldman Sachs Group'}, {'count': 690, 'value': 'Credit Suisse Group'}, {'count': 688, 'value': 'Deutsche Bank'}, {'count': 670, 'value': 'Securities & Exchange Commission'}, {'count': 535, 'value': 'Hedge'}, {'count': 505, 'value': 'North America'}, {'count': 496, 'value': 'Africa'}, {'count': 496, 'value': 'PEG'}, {'count': 490, 'value': 'Royal Bank of Canada'}, {'count': 463, 'value': 'the Middle East'}, {'count': 461, 'value': 'Bank of America'}, {'count': 458, 'value': 'Asia'}, {'count': 453, 'value': 'Cowen'}, {'count': 447, 'value': '12-month'}, {'count': 416, 'value': 'UBS'}, {'count': 413, 'value': 'Jefferies Financial Group'}, {'count': 404, 'value': 'Wells Fargo'}, {'count': 388, 'value': '1-year'}, {'count': 387, 'value': '“Moderate Buy'}, {'count': 377, 'value': 'Latin America'}, {'count': 377, 'value': 'TheStreet'}, {'count': 371, 'value': '52-week'}, {'count': 360, 'value': 'Piper Sandler'}, {'count': 338, 'value': 'Truist'}, {'count': 338, 'value': 'Vanguard Group Inc'}, {'count': 306, 'value': 'JavaScript'}, {'count': 306, 'value': 'Raymond James'}, {'count': 288, 'value': 'State Street'}, {'count': 281, 'value': 'Stock Target Advisor'}, {'count': 264, 'value': 'Robert W. Baird'}, {'count': 258, 'value': 'See'}, {'count': 251, 'value': 'BlackRock Inc.'}, {'count': 248, 'value': 'Stifel Nicolaus'}, {'count': 241, 'value': 'GBX'}, {'count': 235, 'value': 'Directors'}, {'count': 229, 'value': 'GCB'}, {'count': 228, 'value': 'ICG'}, {'count': 228, 'value': 'Institutional Clients Group'}, {'count': 228, 'value': 'Oppenheimer'}, {'count': 227, 'value': 'Global Consumer Banking'}, {'count': 226, 'value': 'DPR'}, {'count': 221, 'value': 'Insider Buying'}, {'count': 210, 'value': 'BMO'}, {'count': 200, 'value': 'China'}, {'count': 197, 'value': 'US'}, {'count': 193, 'value': 'Featured Stories'}, {'count': 189, 'value': 'Medium'}, {'count': 187, 'value': 'Mizuho'}, {'count': 183, 'value': '1.58'}, {'count': 183, 'value': 'C – Get Rating'}, {'count': 172, 'value': 'StockNews.com'}, {'count': 171, 'value': 'Institutional'}, {'count': 170, 'value': 'U.S.'}, {'count': 168, 'value': 'Citi'}, {'count': 165, 'value': 'Canada'}, {'count': 164, 'value': 'KeyCorp'}, {'count': 155, 'value': 'Insider Activity'}, {'count': 155, 'value': 'MT Newswires'}, {'count': 153, 'value': 'MarketScreener'}, {'count': 152, 'value': 'United Kingdom'}, {'count': 147, 'value': 'ZoneBourse'}, {'count': 146, 'value': 'Berenberg Bank'}, {'count': 141, 'value': 'Americas'}, {'count': 141, 'value': 'Captrust Financial Advisors'}, {'count': 141, 'value': 'Reuters'}, {'count': 138, 'value': 'Mexico'}, {'count': 136, 'value': 'Asia Pacific'}, {'count': 136, 'value': 'Australia'}, {'count': 135, 'value': 'Benchmark'}, {'count': 133, 'value': 'New York'}, {'count': 131, 'value': 'BNP Paribas'}, {'count': 128, 'value': 'Course Objective Mean'}, {'count': 128, 'value': 'Institutional Inflows'}, {'count': 122, 'value': 'Wedbush'}, {'count': 118, 'value': 'INC'}, {'count': 117, 'value': 'Needham & Company LLC'}, {'count': 116, 'value': 'CWM LLC'}, {'count': 116, 'value': 'View The Five Stocks Here'}, {'count': 110, 'value': 'DA Davidson'}, {'count': 107, 'value': 'HoldingsChannel.com'}]}
We can visualize the output of the Trends endpoint as a wordcloud to help us quickly interpret the most prevalent keywords.
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
#convert data to dataframe for visualization
trends_data = pd.DataFrame(trends['trends'])
subset = trends_data[['value', 'count']]
tuples = [tuple(x) for x in subset.values]
# Custom Colormap
from matplotlib.colors import ListedColormap # use when indexing directly yo a colour map
word_colours = [
"#495B70" # aylien navy
, "#8BBE07" # aylien green
, "#7A98B7" # grey
, "#E77C05" # orange
, "#0796BE" # blue
, "#162542" # dark grey
]
# listed colour map
cmap = ListedColormap(word_colours)
wordcloud = WordCloud(background_color="white", width=800, height=400, colormap=cmap).generate_from_frequencies(dict(tuples))
plt.figure( figsize=(20,10) )
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()
We have used a wordcloud to invesitage the most prominent entities in a one month period, but what if we want to investigate the frequency of mentions over time?
We can loop over the Trends endpoint and create a timeseries to investigate the distribution of entities over time.
First we will create a function to create a list of tupples containing daily intervals to allow us to search for trends daily within a defined period.
# the time format we need to submit for News API queries
AYLIEN_TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
def to_date(date):
if not isinstance(date, datetime.datetime):
date = str2date(date)
return date.strftime(AYLIEN_TIME_FORMAT)
def str2date(string):
return datetime.datetime.strptime(string, '%Y-%m-%d')
print('done')
def get_intervals(start_date, end_date):
start_date = str2date(start_date)
end_date = str2date(end_date)
return [(to_date(start_date + datetime.timedelta(days=d)),
to_date(start_date + datetime.timedelta(days=d + 1)))
for d in range((end_date - start_date).days)]
Next, we will define our date range, create a list of date tupples and iterate over those daily intervals to populate a dataframe that relates the entity, the number of times it was mentioned and the day the mentions occurred.
# define our daily intervals
today = datetime.date.today().strftime('%Y-%m-%d')
last_month = (datetime.date.today() - datetime.timedelta(days=30)).strftime('%Y-%m-%d')
day_intervals = get_intervals(last_month, today)
# create dataframe in the format we want
my_columns = ['count', 'value', 'published_at']
trends_data_frame = pd.DataFrame(columns = my_columns)
# define the query parameters
params = {
'title': 'Citigroup'
, 'field' : 'entities.body.surface_forms.text'
}
# define what trends we want to return
field = 'entities.body.surface_forms.text'
for day in tqdm(day_intervals):
# define time interval
params['published_at.start'] = day[0]
params['published_at.end'] = day[1]
api_response = get_trends(params)
#covert to dataframe
api_response = pd.DataFrame(api_response['trends'])
# add in a day label
api_response['published_at'] = params['published_at.start']
# add to global dataframe
trends_data_frame = trends_data_frame.append(api_response)
print("Completed")
Completed
We can loop over this dataframe and visualize the distribution of the different entities. Note, the code below visualizes only the top 10 entities.
# we will plot two subplots using the same axes
fig = make_subplots(rows=1, cols=1)
# identify the top ten entities
entities_total = trends_data_frame.groupby(['value'])['count'].agg('sum').reset_index().sort_values(by=['count'], ascending = False)
top_ten_entities = entities_total[0:10]['value'].unique()
# loop over postive and negative sentiment data to generate to line graphs
# start of for loop =======================================================================================
for entity in top_ten_entities:
# filter to the data we want to visualize based on sentiment
data = trends_data_frame[trends_data_frame['value'] == entity]
fig.append_trace(go.Scatter(
x = data['published_at']
, y = data['count']
, mode = 'lines'
, name = entity
)
, col = 1
, row = 1)
# end of for loop =======================================================================================
# forrmat the chart
fig.update_layout(
title='Trending Entities Over Time',
legend = dict(orientation = 'h', y = -0.1),
plot_bgcolor='white',
xaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, yaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, height=700
)
fig.show()
Naturally, multiple news stories will exist that report on the same or similar topics. AYLIEN's clustering enrichment groups stories together that typically correspond to real-world events or topics. Clusters are made of stories that exist close to one another in vector space and the clustering enrichment links clusters to a "representative story" that exists in the centre of each cluster — reading this representative story provides an indication of the general nature of the entire cluster.
Similar to the timeseries and Trends endpoints, clusters enable us to review stories over time and identify points of interest. We can search for individual clusters using a a cluster ID, but similar to stories, we will generally not know the IDs of interest before we find them. Consequently, we can search for clusters using the Trends endpoint. The Trends endpoint allows us to filter clusters based on the stories contained within the clusters.
The Trends endpoint returns the id of clusters sorted by the count of stories associated with them. Once we have each cluster’s id, you can go on to get the stories for each of the clusters from the Stories endpoint. The Trends endpoint only returns the top 100 clusters for a given query.
The following script identifies clusters of news that feature the Citigroup entitiy using the Trends endpoint and returns the top 3 stories in each cluster, ranked by Alexa ranking.
# define the query parameters
params = {
'published_at.start':'NOW-30DAYS',
'published_at.end':'NOW',
'aql': 'entities: {{surface_forms.text:"Citigroup" AND overall_prominence:[0.7 TO *]}}',
'field' : 'clusters'
}
cluster_ids = get_cluster_from_trends(params)
cluster_ids = pd.DataFrame(cluster_ids)
cluster_ids
{'aql': 'entities: {{surface_forms.text:"Citigroup" AND ' 'overall_prominence:[0.7 TO *]}}', 'field': 'clusters', 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'}
count | value | |
---|---|---|
0 | 72 | 403153946 |
1 | 46 | 412444374 |
2 | 42 | 404373226 |
3 | 38 | 406971372 |
4 | 36 | 402698173 |
... | ... | ... |
95 | 9 | 401457503 |
96 | 9 | 402491999 |
97 | 9 | 403191325 |
98 | 9 | 403481293 |
99 | 9 | 403944021 |
100 rows × 2 columns
clusters_output = []
for cluster_id in tqdm(cluster_ids['value'].unique()):
# get cluster
params = {'id[]' : [cluster_id]}
cluster = get_clusters(params)['clusters']
# get top alexa ranked stories associated with the cluster
stories = get_top_stories_in_cluster(cluster_id, 3)
cluster[0]['stories'] = stories
clusters_output.extend(cluster)
time.sleep(1)
print('Complete')
100%|██████████| 100/100 [04:42<00:00, 2.82s/it]
Complete
If we look at the first 3 clusters returned, we can see the number of stories associated with each cluster, the representative story title and the top 3 ranked stories.
for cluster in clusters_output[0:3]:
print('Cluster ID: ' + str(cluster['id']))
print('Story Count: ' + str(cluster['story_count']))
print('Representative Story Title: ' + str(cluster['representative_story']['title']))
print('Top ranked stories in cluster:')
for story in cluster['stories']:
indent_string = ' > '
print(indent_string + story['title'])
print('')
Cluster ID: 403153946 Story Count: 80 Representative Story Title: Citigroup to purchase Deutsche Bank's Mexico license Top ranked stories in cluster: > Citigroup to acquire Deutsche Bank's licence in Mexico > Citigroup to Buy Deutsche Bank’s License in Mexico - Bloomberg > Deutsche Bank expands support for Egypt’s sustainability ambitions Cluster ID: 412444374 Story Count: 53 Representative Story Title: Reguladores instan a Citigroup a corregir plan de quiebra Top ranked stories in cluster: > Agencies announce results of resolution plan review for largest and most complex domestic banks > Bank regulators identify shortcoming in Citigroup resolution plan > Bank regulators tell Citigroup to take urgent action to fix resolution plan Cluster ID: 404373226 Story Count: 63 Representative Story Title: Carrols Restaurant Group (NASDAQ:TAST) Price Target Cut to $4.00 by Analysts at Deutsche Bank Aktiengesellschaft Top ranked stories in cluster: > First Watch Restaurant Group (NASDAQ:FWRG) Price Target Cut to $22.00 > Barclays Raises First Watch Restaurant Group (NASDAQ:FWRG) Price Target to $20.00 > Payoneer Global (NASDAQ:PAYO) PT Raised to $10.00
We can easily visualize the cluster data to make it more easily digestable and understandable. Below we'll convert it to a Pandas dataframe and then visualize with Plotly.
# create dataframe in the format we want
my_columns = ['cluster_id', 'representative_story_title', 'top_story_title', 'published_at', 'story_count']
clusters_data_frame = pd.DataFrame(columns = my_columns)
for cluster in clusters_output:
data = [[
cluster['id']
# translate the stories to English where necessary
, return_translated_content(cluster['representative_story'], 'title')
, return_translated_content(cluster['stories'][0], 'title')
, cluster['representative_story']['published_at']
, cluster['story_count']
]]
data = pd.DataFrame(data, columns = my_columns)
clusters_data_frame = clusters_data_frame.append(data, sort=True)
clusters_data_frame['published_at'] = pd.to_datetime(clusters_data_frame['published_at'], utc = True)
pd.set_option('display.max_rows', 100)
clusters_data_frame = clusters_data_frame.sort_values(by=['story_count'], ascending = False).reset_index(0)
# convert story count to plotly friendly format
clusters_data_frame['story_count'] = clusters_data_frame['story_count'].astype(np.int64)
clusters_data_frame.head()
index | cluster_id | published_at | representative_story_title | story_count | top_story_title | |
---|---|---|---|---|---|---|
0 | 0 | 399596220 | 2022-11-01 03:52:48+00:00 | Goldman Sachs pense que la Fed va continuer à ... | 173 | The Fed will raise interest rates more aggress... |
1 | 0 | 406182731 | 2022-11-20 06:29:14+00:00 | StockNews.com Initiates Coverage on Airgain (N... | 157 | Inotiv (NASDAQ:NOTV) Stock Rating Lowered by T... |
2 | 0 | 409507057 | 2022-11-18 20:27:20+00:00 | Is Digital Dollar Coming Soon? | 133 | Cryptomonaries: A First Test of the Digital Do... |
3 | 0 | 410762310 | 2022-11-28 12:53:02+00:00 | JPMorgan Chase & Co. Boosts Cooper Companies (... | 126 | Citigroup Boosts Ross Stores (NASDAQ:ROST) Pri... |
4 | 0 | 409312077 | 2022-11-15 05:23:32+00:00 | Advance Auto Parts (NYSE:AAP) Upgraded to “Buy... | 116 | Advance Auto Parts (NYSE:AAP) PT Lowered to $1... |
# split title stings over multiple lines for legibility
split_title_string(clusters_data_frame, 'representative_story_title')
split_title_string(clusters_data_frame, 'top_story_title')
colours = {
'positive' : 'green'
, 'positive_opaque' : 'rgba(138, 190, 6, 0.05)'
, 'negative' : 'red'
, 'negative_opaque' : 'rgba(228, 42, 58, 0.05)'
, 'neutral' : 'rgb(40, 56, 78)'
, 'neutral_opaque' : 'rgba(40, 56, 78, 0.05)'
}
#biggest cluster size
big_cluster_size = 200
# calculate the factor by which we will mutlipy all clusters to fit them on the graph
factor = clusters_data_frame['story_count'].max()/big_cluster_size
fig = go.Figure(data=go.Scatter(
x=clusters_data_frame['published_at'],
y=clusters_data_frame['story_count'],
mode='markers',
marker=dict(
size=clusters_data_frame['story_count']/factor
, line = dict(width=2, color = colours['neutral'])
, color = colours['neutral' + '_opaque']
),
text = 'Index: ' + clusters_data_frame['index'].astype(str)
+ '<br>No. Stories:' + clusters_data_frame['story_count'].astype(str)
+ '<br><br>Rep Story: <br>'
+ clusters_data_frame['representative_story_title_string']
+ '<br>Top Story: <br>'
+ clusters_data_frame['top_story_title_string']
))
fig.update_layout(
height=700
)
# forrmat the chart
fig.update_layout(
title='Story Clusters Over Time',
legend = dict(orientation = 'h', y = -0.1),
plot_bgcolor='white',
xaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, yaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, height=700
)
fig.show()
Here we have given a quick introduction in how to get up and running with four of the AYLIEN News' API's most frequently used endpoints. With these code and visualization examples, you should be able to start exploring news data in no time!