Download Jupyter notebook here
In this document, we will review four of the AYLIEN News API's most commonly used endpoints:
We will utilise AYLIEN's Python SDK (Software Development Kit) and also show you some helpful code to start wrangling the data in Python using Pandas and visualizing it using Plotly.
As an exercise, we will focus on pulling news stories related to Citibank, to show how these different endpoints can be used in combination to investigate a topic of your choice.
Please note, comprehensive documentation on how to use the News API can be found here.
Here we will outline how to connect to AYLIEN's News API and define some useful functions to make pulling and analysing our data easier.
First things first — we need to connect to the News API. Make sure that you have installed the aylien_news_api library using pip. The code below demonstrates how to connect to the API and also imports some other libraries that will be useful later.
Don't forget to enter your API credentials in order to connect to the API! If you don't have any credentials yet, you can sign up for a free trial here.
from __future__ import print_function
# install packages if not installed already
# !pip install datetime
# !pip install pandas
# !pip install numpy
# !pip install plotly
# !pip install aylien_news_api
# !pip install chart_studio
# !pip install tqdm
# !pip install pprint
# !pip install wordcloud
import os
import requests
import datetime
from dateutil.tz import tzutc
import json
import time
import pandas as pd
import numpy as np
import math
from tqdm import tqdm
from pprint import pprint
# for visualization
import plotly.graph_objs as go
import chart_studio.plotly as py
from plotly.subplots import make_subplots
headers = {
'X-AYLIEN-NewsAPI-Application-ID': 'ID',
'X-AYLIEN-NewsAPI-Application-Key': 'KEY'
}
print('Complete')
Complete
The Functions below will be used to pull the data from the API using get requests. In some cases, data will be returned as an array of objects e.g. the get_stories function. In others data will be returned as Pandas dataframes e.g. the get_timeseires function.
#=======================================================================================
def get_timeseries(params, print_params = None, print_count = None):
if print_params is None or print_params == 'yes':
pprint(params)
response = requests.get('https://api.aylien.com/news/time_series', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
#convert to dataframe
timeseries_data = pd.DataFrame(response['time_series'])
# convert back to datetime
timeseries_data['published_at'] = pd.to_datetime(timeseries_data['published_at'])
timeseries_data['published_at'] = timeseries_data['published_at'].dt.date
if print_count is None or print_count == 'yes':
print('Number of stories returned : ' + str(format(timeseries_data['count'].sum(), ",d")))
return timeseries_data
#=======================================================================================
def get_stories(params, print_params = None, print_count = None, print_story = None):
if print_params is None or print_params == 'yes':
pprint(params)
fetched_stories = []
stories = None
while stories is None or len(stories) > 0:
try:
response = requests.get('https://api.aylien.com/news/stories', params=params, headers=headers).json()
except Exception as e:
continue
if 'errors' in response or 'error' in response:
pprint(response)
stories = response['stories']
if len(stories) > 0:
print(stories[0]['title'])
print(stories[0]['links']['permalink'])
params['cursor'] = response['next_page_cursor']
fetched_stories += stories
if (print_story is None or print_story == 'yes') and len(stories) > 0:
pprint(stories[0]['title'])
if print_count is None or print_count == 'yes':
print("Fetched %d stories. Total story count so far: %d" %(len(stories), len(fetched_stories)))
return fetched_stories
#=======================================================================================
def get_top_ranked_stories(params, no_stories, print_params = None, print_count = None):
if print_params is None or print_params == 'yes':
pprint(params)
fetched_stories = []
stories = None
while stories is None or len(stories) > 0 and len(fetched_stories) < no_stories:
try:
response = requests.get('https://api.aylien.com/news/stories', params=params, headers=headers).json()
except Exception as e:
continue
if 'errors' in response or 'error' in response:
pprint(response)
stories = response['stories']
if len(stories) > 0:
print(stories[0]['title'])
print(stories[0]['links']['permalink'])
params['cursor'] = response['next_page_cursor']
fetched_stories += stories
if print_count is None or print_count == 'yes':
print("Fetched %d stories. Total story count so far: %d" %(len(stories), len(fetched_stories)))
return fetched_stories
#=======================================================================================
def get_clusters(params={}):
#pprint(params)
response = requests.get('https://api.aylien.com/news/clusters', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
return response
#=======================================================================================
# pull trends data to identify most frequently occuring entities or keywords
def get_trends(params={}):
#pprint(params)
response = requests.get('https://api.aylien.com/news/trends', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
return response
#=======================================================================================
def get_cluster_from_trends(params, print_params = None):
if print_params is None or print_params == 'yes':
pprint(params)
"""
Returns a list of up to 100 clusters that meet the parameters set out.
"""
response = requests.get('https://api.aylien.com/news/trends', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
if len(response) > 0:
return response["trends"]
#=======================================================================================
# identify the top ranked story per cluster
def get_top_stories_in_cluster(cluster_id, no_stories):
top_story_params = {
'clusters[]' : [cluster_id]
, 'sort_by' : "source.rankings.alexa.rank"
, 'per_page' : no_stories
, 'return[]' : ['id', 'language', 'links', 'title', 'source', 'translations', 'clusters', 'published_at']
}
response = requests.get('https://api.aylien.com/news/stories', params=top_story_params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
if len(response["stories"]) > 0:
return response["stories"]
else:
return None
#=======================================================================================
# helper endpoint that takes a string of characters and an entity type (such as sources, or DBpedia entities) and returns matching entities of the specified type along with additional metadata
# params = {'type' : 'source_names', 'term' : 'Times of India' }
def autocompletes(params={}):
pprint(params)
"""
Returns a list of up to 100 clusters that meet the parameters set out.
"""
response = requests.get('https://api.aylien.com/news/autocompletes', params=params, headers=headers).json()
if 'errors' in response or 'error' in response:
pprint(response)
pprint(response)
These other functions will help us format data as necessary.
# return transalted title or body of a story (specify in params)
def return_translated_content(story_x, text_x):
if 'translations' in story_x:
return story_x['translations']['en'][text_x]
else:
return story_x[text_x]
# create smaller lists from big lists
def chunks(lst, n):
return list(lst[i:i + n] for i in range(0, len(lst), n))
#=======================================================================================
# split title string over multiple lines for legibility on graph
def split_title_string(dataframe_x, column_x):
title_strings = []
for index, row in dataframe_x.iterrows():
word_array = row[column_x].split()
counter = 0
string = ''
for word in word_array:
if counter == 7:
string += (word + '<br>')
counter = 0
else:
string += (word + ' ')
counter += 1
title_strings.append(string)
dataframe_x[column_x + '_string'] = (title_strings)
#=======================================================================================
def print_keyword_mention(story_x, element_x, keyword_x):
body_x = story[element_x]
if 'translations' in story and story['translations'] is not None and 'en' in story['translations']:
body_x = story['translations']['en'][element_x]
# extract a window around key entity
e_idx = body_x.find(keyword_x)
e_end = e_idx + len(keyword_x)
if e_idx >= 0:
e_str = body_x[e_idx-100:e_idx] + "\033[1m" + body_x[e_idx:e_end] + "\033[0m " + body_x[e_end+1:e_end+51]
print(f'{e_str}')
elif element_x == 'title':
print(story['title'])
#=======================================================================================
def print_entities(story_x, element_x = None, surface_form_x = None, version_x = None):
element = ''
if element_x is None or element_x == 'body':
element = 'body'
else:
element = 'title'
# if no surface_form
if surface_form_x is None:
for entity in story_x['entities']:
pprint(entity)
else:
for entity in story_x['entities']:
x = 0
for surface_form in entity[element_x]['surface_forms']:
if surface_form_x.lower() in surface_form['text'].lower():
x = 1
if x != 0:
pprint(entity)
The most granular data point we can extract from the News API is a story; all other endpoints are aggregations or extrapolations of stories. Stories are basically news articles that have been enriched using AYLIEN's machine learning prcoess. We will learn more about this enrichment later.
For now we will pull one story published in English in the last hour.
# define parameters
params = {
'published_at.start': 'NOW-1HOUR'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'per_page' : 1
}
stories = get_top_ranked_stories(params, 1)
print()
pprint(stories)
{'language[]': ['en'], 'per_page': 1, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1HOUR'} USA TODAY Sports Josh McDaniels on Raiders' Resiliency in OT Win Over Seattle Originally posted on FanNation Raider Maven By Aidan Champion | Last updated 11/28/22 https://www.yardbarker.com/nfl/articles/josh_mcdaniels_on_raiders_resiliency_in_ot_win_over_seattle/s1_16640_38177445 Fetched 1 stories. Total story count so far: 1 [{'author': {'id': 27406279, 'name': 'Aidan Champion'}, 'body': 'For the second week in a row, the Las Vegas Raiders found the will ' 'to win in overtime on the road.\n' ' A week after defeating the Denver Broncos on a walk-off play in ' 'OT, the Raiders did the same against a solid Seattle Seahawks ' 'team. "I think our team is obviously learning how to be ' 'resilient," Raiders coach Josh McDaniels said in his postgame press ' 'conference Sunday. "And give Seattle a lot of credit. This is a ' "good football team, they're well coached like we thought they would " 'be. Pete [Carroll] does a great job, and they gave us some fits on ' 'some things and made some adjustments and we had to make some ' 'adjustments and it was a very interesting game in that regard. But ' 'I thought our guys were tough." Sunday\'s game was a sequence of ' 'ups and downs, with the Raiders even falling behind by a touchdown ' 'with just over 5 and half minutes remaining in regulation. "You ' 'got to focus on the next drive, the next sequence, the next group ' 'that\'s going to go out there on the field," McDaniels said. "I ' "mean, it started from the first play to the last play. First play's " "an interception and the last play's a touchdown. There was a lot of " 'swings, and I credit our coaches. Our coaches did a really good job ' 'of staying neutral at times when they needed to be and trying to ' 'fix the problems if there were any and address those without having ' 'a bunch of emotion in it." As promising as the back-to-back ' 'victories have been for the Silver and Black, McDaniels has always ' 'felt optimistic his team was heading in the right direction. ' '"I\'ve never doubted that it was," he said. "And like I said, the ' "NFL, there's a lot of close games every week, and sometimes it " 'takes a little while to learn how to get over the hump on some of ' "those things, and that's what we attribute it to. Doesn't guarantee " "us anything going forward. We're going to stick with our process, " 'we think we have a really close-knit group here that works hard, we ' "believe in what we're doing, we believe in what we're coaching, we " "believe in trying to win the way we're trying to win. And I think " 'our guys do, too." This article first appeared on FanNation Raider ' 'Maven and was syndicated with permission. More must-reads:', 'categories': [{'id': 'IAB17', 'label': 'Sports', 'links': {'self': 'https://api.aylien.com/api/v1/classify/taxonomy/iab-qag/IAB17'}, 'score': 0.33, 'taxonomy': 'iab-qag'}, {'id': 'IAB17-12', 'label': 'Football', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/iab-qag/IAB17'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/iab-qag/IAB17-12'}, 'score': 0.24, 'taxonomy': 'iab-qag'}, {'id': '15003000', 'label': 'American football', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/iptc-subjectcode/15000000'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/iptc-subjectcode/15003000'}, 'score': 0.54, 'taxonomy': 'iptc-subjectcode'}, {'id': 'ay.lifesoc.prosport', 'label': 'Professional Sports', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.lifesoc.prosport'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports', 'label': 'Sports', 'links': {'parents': [], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports.football', 'label': 'Football (American)', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.team'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.football'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports.nfl', 'label': 'National Football League', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.lifesoc.prosport', 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.football'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.nfl'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.sports.team', 'label': 'Team Sports', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports.team'}, 'score': 1, 'taxonomy': 'aylien'}, {'id': 'ay.lifesoc.gensport', 'label': 'General Sports', 'links': {'parents': ['https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.sports'], 'self': 'https://api.aylien.com/api/v1/classify/taxonomy/aylien/ay.lifesoc.gensport'}, 'score': 0.9, 'taxonomy': 'aylien'}], 'characters_count': 2187, 'clusters': [409119966], 'entities': [{'body': {'sentiment': {'confidence': 0.59, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 387, 'start': 380}, 'sentiment': {'confidence': 0.59, 'polarity': 'neutral'}}], 'text': 'Seattle'}]}, 'external_ids': {}, 'id': 'Q5083', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q5083', 'wikipedia': 'https://en.wikipedia.org/wiki/Seattle'}, 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.74, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 80, 'start': 73}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'Seattle'}]}, 'types': ['Local_government', 'Corporation', 'Location', 'Political_organisation', 'City', 'Government', 'Community', 'Company', 'Organization']}, {'body': {'sentiment': {'confidence': 0.74, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 323, 'start': 309}, 'sentiment': {'confidence': 0.74, 'polarity': 'positive'}}], 'text': 'Josh McDaniels'}]}, 'external_ids': {}, 'id': 'Q3810320', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q3810320', 'wikipedia': 'https://en.wikipedia.org/wiki/Josh_McDaniels'}, 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 34, 'start': 20}, 'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}}], 'text': 'Josh McDaniels'}]}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 3, 'mentions': [{'index': {'end': 179, 'start': 172}, 'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}}, {'index': {'end': 302, 'start': 295}, 'sentiment': {'confidence': 0.67, 'polarity': 'positive'}}, {'index': {'end': 775, 'start': 768}, 'sentiment': {'confidence': 0.59, 'polarity': 'negative'}}], 'text': 'Raiders'}]}, 'external_ids': {}, 'id': 'Q5870124', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q5870124', 'wikipedia': 'https://en.wikipedia.org/wiki/History_of_the_Oakland_Raiders'}, 'overall_frequency': 4, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.82, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.91, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 45, 'start': 38}, 'sentiment': {'confidence': 0.91, 'polarity': 'neutral'}}], 'text': 'Raiders'}]}, 'types': []}, {'body': {'sentiment': {'confidence': 0.79, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 2134, 'start': 2112}, 'sentiment': {'confidence': 0.79, 'polarity': 'neutral'}}], 'text': 'FanNation Raider Maven'}]}, 'external_ids': {}, 'id': 'N186086181726508417844685281276398801348', 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.85, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.9, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 132, 'start': 110}, 'sentiment': {'confidence': 0.9, 'polarity': 'neutral'}}], 'text': 'FanNation Raider ' 'Maven'}]}, 'types': ['Location']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N130591893304718568511464573285032572817', 'overall_frequency': 1, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 16, 'start': 0}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'USA TODAY Sports'}]}, 'types': ['Organization']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N223547351047399335585713261784924445595', 'overall_frequency': 1, 'overall_prominence': 0.91, 'overall_sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 72, 'start': 64}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'Win Over'}]}, 'types': ['Location']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'Q132148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q132148', 'wikipedia': 'https://en.wikipedia.org/wiki/Aidan_of_Lindisfarne'}, 'overall_frequency': 1, 'overall_prominence': 0.8, 'overall_sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 144, 'start': 139}, 'sentiment': {'confidence': 0.89, 'polarity': 'neutral'}}], 'text': 'Aidan'}]}, 'types': ['Human']}, {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N283599043316305970941549218810195124075', 'overall_frequency': 1, 'overall_prominence': 0.76, 'overall_sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 174, 'start': 170}, 'sentiment': {'confidence': 0.88, 'polarity': 'neutral'}}], 'text': 'Last'}]}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.72, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 51, 'start': 34}, 'sentiment': {'confidence': 0.72, 'polarity': 'positive'}}], 'text': 'Las Vegas Raiders'}]}, 'external_ids': {}, 'id': 'Q324523', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q324523', 'wikipedia': 'https://en.wikipedia.org/wiki/Las_Vegas_Raiders'}, 'overall_frequency': 1, 'overall_prominence': 0.68, 'overall_sentiment': {'confidence': 0.72, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Nonprofit_organization', 'Organization']}, {'body': {'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 141, 'start': 127}, 'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}}], 'text': 'Denver Broncos'}]}, 'external_ids': {}, 'id': 'Q223507', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q223507', 'wikipedia': 'https://en.wikipedia.org/wiki/Denver_Broncos'}, 'overall_frequency': 1, 'overall_prominence': 0.55, 'overall_sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Nonprofit_organization', 'Organization']}, {'body': {'sentiment': {'confidence': 0.75, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 166, 'start': 164}, 'sentiment': {'confidence': 0.75, 'polarity': 'neutral'}}], 'text': 'OT'}]}, 'external_ids': {}, 'id': 'Q186982', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q186982', 'wikipedia': 'https://en.wikipedia.org/wiki/Overtime_(sports)'}, 'overall_frequency': 1, 'overall_prominence': 0.49, 'overall_sentiment': {'confidence': 0.75, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': []}, {'body': {'sentiment': {'confidence': 0.58, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 225, 'start': 209}, 'sentiment': {'confidence': 0.58, 'polarity': 'neutral'}}], 'text': 'Seattle Seahawks'}]}, 'external_ids': {}, 'id': 'Q221878', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q221878', 'wikipedia': 'https://en.wikipedia.org/wiki/Seattle_Seahawks'}, 'overall_frequency': 1, 'overall_prominence': 0.43, 'overall_sentiment': {'confidence': 0.58, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Nonprofit_organization', 'Organization']}, {'body': {'sentiment': {'confidence': 0.91, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 2, 'mentions': [{'index': {'end': 995, 'start': 986}, 'sentiment': {'confidence': 0.83, 'polarity': 'neutral'}}, {'index': {'end': 1458, 'start': 1449}, 'sentiment': {'confidence': 0.91, 'polarity': 'positive'}}], 'text': 'McDaniels'}]}, 'external_ids': {}, 'id': 'Q16846249', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q16846249', 'wikipedia': 'https://en.wikipedia.org/wiki/K._J._McDaniels'}, 'overall_frequency': 2, 'overall_prominence': 0.34, 'overall_sentiment': {'confidence': 0.91, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.97, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 491, 'start': 487}, 'sentiment': {'confidence': 0.97, 'polarity': 'positive'}}], 'text': 'Pete'}]}, 'external_ids': {}, 'id': 'N334899751825691118615243936337416130988', 'overall_frequency': 1, 'overall_prominence': 0.03, 'overall_sentiment': {'confidence': 0.97, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.9, 'polarity': 'positive'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 1447, 'start': 1442}, 'sentiment': {'confidence': 0.9, 'polarity': 'positive'}}], 'text': 'Black'}]}, 'external_ids': {}, 'id': 'N309782724290245396668082628620805636318', 'overall_frequency': 1, 'overall_prominence': 0.02, 'overall_sentiment': {'confidence': 0.9, 'polarity': 'positive'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Human']}, {'body': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 1600, 'start': 1597}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'NFL'}]}, 'external_ids': {}, 'id': 'Q1215884', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q1215884', 'wikipedia': 'https://en.wikipedia.org/wiki/National_Football_League'}, 'overall_frequency': 1, 'overall_prominence': 0.02, 'overall_sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Business', 'Organization']}], 'hashtags': ['#Seattle', '#JoshMcDaniels', '#WalkoffHomeRun', '#USAToday', '#Touchdown', '#SeattleSeahawks', '#Overtime', '#OaklandRaiders', '#NationalFootballLeague', '#NFL', '#MavenHuffman', '#Interception', '#Emotion', '#DenverBroncos', '#BroadcastSyndication', '#AssociationFootball'], 'id': 5079455953, 'industries': [], 'keywords': ['things', 'OT', 'Seattle Seahawks', 'week', 'Aidan Champion', 'Seattle', 'Maven', 'team', 'play', 'emotion', 'touchdown', 'Josh McDaniels', 'Win Over', 'overtime', 'Black', 'interception', 'walk-off', 'Pete', 'syndicated', 'McDaniels', 'Last', 'Raiders', 'USA TODAY', 'Denver Broncos', 'NFL', 'football', 'Sunday'], 'language': 'en', 'license_type': 0, 'links': {'clusters': '/stories?clusters[]=409119966', 'permalink': 'https://www.yardbarker.com/nfl/articles/josh_mcdaniels_on_raiders_resiliency_in_ot_win_over_seattle/s1_16640_38177445', 'related_stories': '/related_stories?story_id=5079455953'}, 'media': [{'format': 'JPEG', 'height': 900, 'type': 'image', 'url': 'https://www.yardbarker.com/media/7/6/76d5488c5ae0f0dd9b411351871bdc7a7b623a6b/thumb_16x9/usatsi_19517121_168390101_lowres.jpg?v=1', 'width': 1600}], 'paragraphs_count': 2, 'published_at': '2022-11-28T16:58:47Z', 'sentences_count': 22, 'sentiment': {'body': {'polarity': 'positive', 'score': 0.6}, 'title': {'polarity': 'neutral', 'score': 0.75}}, 'source': {'domain': 'yardbarker.com', 'home_page_url': 'https://www.yardbarker.com/', 'id': 117069, 'locations': [{'country': 'US'}], 'logo_url': '', 'name': 'Yardbarker', 'scopes': []}, 'summary': {'sentences': ['For the second week in a row, the Las Vegas ' 'Raiders found the will to win in overtime on the ' 'road.\n' ' ', 'A week after defeating the Denver Broncos on a ' 'walk-off play in OT, the Raiders did the same ' 'against a solid Seattle Seahawks team. ', '"I think our team is obviously learning how to be ' 'resilient," Raiders coach Josh McDaniels said in ' 'his postgame press conference Sunday.', '"And like I said, the NFL, there\'s a lot of ' 'close games every week, and sometimes it takes a ' 'little while to learn how to get over the hump on ' "some of those things, and that's what we " 'attribute it to.', "Sunday's game was a sequence of ups and downs, " 'with the Raiders even falling behind by a ' 'touchdown with just over 5 and half minutes ' 'remaining in regulation. ']}, 'title': "USA TODAY Sports Josh McDaniels on Raiders' Resiliency in OT " 'Win Over Seattle Originally posted on FanNation Raider ' 'Maven By Aidan Champion \t\t\t\t\t\t\xa0|\xa0 \t\t\t\t\tLast ' 'updated 11/28/22', 'words_count': 420}]
We can see that the story output is a list with one dictionary object representing the story we queried. The story object inlcudes the title, body text, summary sentences and lots of other contextual information that has been made available via AYLIEN's enrichment process.
We can loop through the object's key names to give us a flavour of what is available.
for key in stories[0]:
print(key)
author body categories industries characters_count clusters entities hashtags id keywords language links media paragraphs_count published_at sentences_count sentiment source summary title words_count license_type
Using a keyword search, we can search the AYLIEN database for words that appear in the title or body of an article. Here we will search for "Citigroup" in the title.
We will also limit the the date range — if we don't, we could return thousands of stories that feature "Citigroup" in the title — and define the language as English ("en"). Defining the language not only limits our output to English language content, it also allows the query to to remove any relevant stopwords. Learn about stopwords here.
We will also introduce the cursor. We don't know how many stories we'll get, and the cursor will allow us to scan through results. Learn more about using the cursor here.
The per_page parameter defines how many stories are returned for each API call, with 100 being the max.
The default parameters below will use relative times to ensure you can access recent news data (historical data is restricted). You can try changing the time periods by altering the paramters using the following formats:
# define the query parameters
params = {
'language[]': ['en'],
'title': 'Citigroup',
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'cursor': '*',
'per_page' : 50
}
stories = get_stories(params)
print('************')
print("Fetched %s stories" %(len(stories)))
{'cursor': '*', 'language[]': ['en'], 'per_page': 50, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS', 'title': 'Citigroup'} BUZZ-Live Nation rises as Citigroup lifts rating to 'buy' https://www.swissquote.ch/sqi_premium/market/news/News.action?id=14986092 "BUZZ-Live Nation rises as Citigroup lifts rating to 'buy'" Fetched 50 stories. Total story count so far: 50 Caribou Biosciences (NASDAQ:CRBU) Given New $37.00 Price Target at Citigroup https://dakotafinancialnews.com/2022/11/27/caribou-biosciences-nasdaqcrbu-given-new-37-00-price-target-at-citigroup.html 'Caribou Biosciences (NASDAQ:CRBU) Given New $37.00 Price Target at Citigroup' Fetched 50 stories. Total story count so far: 100 Northern Oil and Gas (NYSE:NOG) PT Raised to $46.00 at Citigroup https://baseballnewssource.com/2022/11/27/northern-oil-and-gas-nysenog-pt-raised-to-46-00-at-citigroup/7852403.html 'Northern Oil and Gas (NYSE:NOG) PT Raised to $46.00 at Citigroup' Fetched 7 stories. Total story count so far: 107 Fetched 0 stories. Total story count so far: 107 ************ Fetched 107 stories
Depending on what parameters you used (and of course, how much Citgroup featured in the news), your number of stories may vary. Let's print the first 10 titles to get a feel for the stories we have pulled.
for story in stories[0:10]:
print(story['id'])
print(story['title'])
print('')
5079435568 BUZZ-Live Nation rises as Citigroup lifts rating to 'buy' 5079419698 Citigroup Inc. (NYSE: C) Is Rated A Buy By Analysts. 5079358521 Ensign Peak Advisors Inc Has $57.29 Million Stock Holdings in Citigroup Inc. (NYSE:C) 5079233045 Citigroup Trims Galera Therapeutics (NASDAQ:GRTX) Target Price to $18.00 5079229312 Citigroup Upgrades Live Nation Entertainment (NYSE:LYV) to “Buy” 5079207000 Citigroup Upgrades Live Nation Entertainment (NYSE:LYV) to “Buy” 5079202754 MeridianLink (NYSE:MLNK) Price Target Lowered to $16.00 at Citigroup 5079174029 NuCana (NASDAQ:NCNA) PT Lowered to $2.00 at Citigroup 5079172560 Citigroup Raises Five Below (NASDAQ:FIVE) Price Target to $186.00 5079171435 MeridianLink (NYSE:MLNK) Price Target Lowered to $16.00 at Citigroup
What if we want to refine our keyword search further? We can create more complicated searches using Boolean statements. For instance, if we were interested in searching for news that mentioned Citigroup or Bank of America and that also mentioned "shares" but not "sell", we could write the following query. It is important to note here that the "Bank of America" search term is wrapped in double quotes — if it wasn't, each individual word would be treated as an indivudal search term, but we want to search for the full phrase.
# define the query parameters
params = {
'language[]': ['en'],
'title': '("Citigroup" OR "Bank of America" ) AND "shares" NOT "sell"',
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'cursor': '*',
'per_page' : 50
}
stories = get_stories(params)
print('************')
print("Fetched %s stories" %(len(stories)))
print('************')
for story in stories:
print(story['title'])
print('')
{'cursor': '*', 'language[]': ['en'], 'per_page': 50, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS', 'title': '("Citigroup" OR "Bank of America" ) AND "shares" NOT "sell"'} Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase https://www.informnny.com/news/business/press-releases/cision/20221128NY47953/bank-of-america-corporation-announces-hypothetical-accrued-dividends-and-hypothetical-total-consideration-for-libor-depositary-shares-sought-in-its-cash-tender-offers-and-amendments-to-the-offer-to-pu/ ('Bank of America Corporation Announces Hypothetical Accrued Dividends and ' 'Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its ' 'Cash Tender Offers and Amendments to the Offer to Purchase') Fetched 28 stories. Total story count so far: 28 Fetched 0 stories. Total story count so far: 28 ************ Fetched 28 stories ************ Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer t... Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase BRIEF-Bank Of America Corporation Announces Hypothetical Accrued Dividends And Hypothetical Total Consideration For LIBOR Depositary Shares Sought In Its Cash Tender Offers And Amendments To Offer To Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Bank of America Corporation Announces Hypothetical Accrued Dividends and Hypothetical Total Consideration for LIBOR Depositary Shares Sought in its Cash Tender Offers and Amendments to the Offer to Purchase Strategic Blueprint LLC Acquires 447 Shares of Bank of America Co. (NYSE:BAC) Aramco unit hires HSBC, Citigroup for Riyadh share sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Westover Capital Advisors LLC Acquires New Shares in Bank of America Co. (NYSE:BAC) Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Saudi Aramco Base Oil hires HSBC, Citigroup for Riyadh share sale Aramco Unit Hires HSBC, Citigroup for $1 Billion Share Sale Aramco Unit Hires HSBC, Citigroup for $1 Billion Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale (1) Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Aramco Unit Hires HSBC, Citigroup for Riyadh Share Sale Eubel Brady & Suttman Asset Management Inc. Purchases 3,906 Shares of Citigroup Inc. (NYSE:C) Bank of America Co. (NYSE:BAC) Shares Sold by Robertson Stephens Wealth Management LLC Eubel Brady & Suttman Asset Management Inc. Buys 3,906 Shares of Citigroup Inc. (NYSE:C)
We can see that we can refine our query by adding Boolean operators to our keyword search. However, this can become more complicated if we want to cast our net wider. For instance, let's say we want to pull stories about the banking sector in general. Rather than writing a complicated keyword search, we can search by a news category.
AYLIEN'S NLP enrichment classifies stories into categories to allow us to make more powerful searches. Our classifier is capable of classifying content into two taxonomies where a code corresponds with a a subject. Learn more here.
Here, we will search for all stories classified as "banking" (04006002) using the IPTC subject taxonomy. You can search for other IPTC codes here.
Many stories will be categorised under "banking", so we will restrict our output to the first 100.
We can also perform categorial search using the IAB taxonomy or the AYLIEN Smart Tagger which will be discussed later.
# define the query parameters
params = {
'language': ['en'],
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'categories.taxonomy[]': 'iptc-subjectcode',
'categories.id[]': ['04006002'],
'cursor': '*',
'per_page' : 10
}
print(params)
stories = get_top_ranked_stories(params, 10)
print('************')
print("Fetched %s stories" %(len(stories)))
for story in stories:
print(story['title'])
print('')
{'language': ['en'], 'published_at.start': 'NOW-2DAYS', 'published_at.end': 'NOW', 'categories.taxonomy[]': 'iptc-subjectcode', 'categories.id[]': ['04006002'], 'cursor': '*', 'per_page': 10} {'categories.id[]': ['04006002'], 'categories.taxonomy[]': 'iptc-subjectcode', 'cursor': '*', 'language': ['en'], 'per_page': 10, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS'} Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago https://journaltimes.com/lifestyles/health-med-fit/wall-street-slips-as-lockdown-protests-spread-in-china/article_81621f36-0538-520e-9234-9e04289ca9ab.html Fetched 10 stories. Total story count so far: 10 ************ Fetched 10 stories Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago 'The Bank of Canada Still Has Your Back, But It's Got a Knife In It': Experts Weigh In On Market Future Teton Advisors Inc. Lowers Holdings in Value Line, Inc. (NASDAQ:VALU) Safra New York Corporation To Acquire Delta National Bank and Trust The Bank-Run Phenomenon 684. News: Daylight builds for the LGBTQ+ community and the FCA hits back at trading apps Safra New York Corporation To Acquire Delta National Bank and Trust Nigerian man flaunts over N1m saved in his piggy bank after he stopped doing 9k weekly data sub Get £175 for switching to Halifax…but there's a catch Keith Ligori
You may find you want to sort your query response by some metric. In the examples above, we have taken the top N stories.
These have been sorted - by default - by published date i.e. we are getting the most recent N stories that meet our search criteria.
Sorting the query response is particularly useful when many stories meet our search criteria but we only want N stories. For example, say 1,000 stories met our search criteria - we could sort these stories by a range of metrics and return the top N.
We can use the following paramters to sort our response by:
You can read more about sorting in our docs.
The sort order by default is descending, but we can explictly state which direction we want to sort by using the 'sort_by' parameter.
In the following example, we perform a keyword search and sort by keyword relevance.
params = {
'language': ['en'],
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'text' : 'Microsoft AND (merge OR acquire)',
'cursor': '*',
'per_page' : 10,
'sort_by' : 'relevance'
}
print(params)
stories = get_top_ranked_stories(params, 10)
print('************')
print("Fetched %s stories" %(len(stories)))
for story in stories:
print(story['title'])
print('')
{'language': ['en'], 'published_at.start': 'NOW-2DAYS', 'published_at.end': 'NOW', 'text': 'Microsoft AND (merge OR acquire)', 'cursor': '*', 'per_page': 10, 'sort_by': 'relevance'} {'cursor': '*', 'language': ['en'], 'per_page': 10, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS', 'sort_by': 'relevance', 'text': 'Microsoft AND (merge OR acquire)'} In transaction documents between Microsoft COR and Activision Blizzard, Inc date of an exit of The Elder Scrolls 6] was foun https://news.myseldon.com/en/news/index/275465595 Fetched 10 stories. Total story count so far: 10 ************ Fetched 10 stories In transaction documents between Microsoft COR and Activision Blizzard, Inc date of an exit of The Elder Scrolls 6] was foun New Microsoft partnership to drive technical growth at MTN Group Autonomy Orders 2,500 VinFast VF 8 And VF 9 Electric Cars Microsoft Reported To Extend Call Of Duty Multiplatform Release To PlayStation For Ten Years Where to get a Choice Specs in Pokémon Scarlet and Violet How to get a Choice Specs in Pokémon Scarlet and Violet Sony wanted to bring PlayStation Plus to Xbox, but Microsoft “wouldn't let it happen,” says SIE Joe Jonas urges people to 'check in' with themselves and their friends 6 biggest deal reports this week: Manchester United open to selling the club By Finally! Microsoft Reveals Why It Prefers Elder Scrolls 6 as Xbox Activision Blizzard, Inc and Microsoft COR accused of arrangement and falsification of the transaction on merge for $69 billion
The AYLIEN Query Language (or AQL), is AYLIEN's custom 'flavour' of the Lucene syntax that enables users to make more powerful queries on our data.
Queries in this syntax are made within an 'aql' parameter.
AQL enables us to perform more sophisticated searches like boosting the importance of keywords and enhanced entity search.
When making a query with many keywords, sometimes one keyword in is more important to your search than others. Boosting enables you to add weight to the more important keyword/keywords so that results mentioning these keywords are given a “boost” to get them higher in the results order.
For example, searching ["John", "Frank", "Sarah"] gives equal weight to each term, but ["John", "Frank"^2, "Sarah"] is like saying a mention of “Frank” is twice as important as a mention of “John” or “Sarah”. Stories mentioning “Frank” will therefore appear higher in the rank of search results. We can reduce the importance of a keyword by attributing a decimal number e.g. 0.5.
Boosting is not the definitive keyword search input, simply allows the user to specify the preponderant keywords in a list (i.e. if a story contains many mentions of non-boosted searched keywords, it could still be returned ahead of many stories that mention a boosted keyword). Boosting therefore does not exclude stories from the results, it only affects the order of returned results.
The boost is allocated using the ^ symbol.
In the example below, we search for a wide variety of keywords but give special significance to the "radioactive" keyword.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'title:(("toxic" "chemical" "industrial" "radioactive"^10 "sewerage") AND ("spill" "leak" "dump" "disaster" "contaminate" "waste" "pollute"))'
, 'sort_by' : 'relevance'
}
stories = get_top_ranked_stories(params, 10)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
{'aql': 'title:(("toxic" "chemical" "industrial" "radioactive"^10 "sewerage") ' 'AND ("spill" "leak" "dump" "disaster" "contaminate" "waste" ' '"pollute"))', 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH', 'sort_by': 'relevance'} Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005779051 Fetched 10 stories. Total story count so far: 10 ############# Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005779051 Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005772026 Radioactive Waste Missouri School https://n.news.naver.com/mnews/article/077/0005772025 Norway-Kaupanger: Radioactive-, toxic-, medical- and hazardous waste services https://ted.europa.eu/udl?uri=TED:NOTICE:656593-2022:TEXT:EN:HTML State must stop plan to dump radioactive water Ukraine shelled radioactive waste storage – official https://www.rt.com/russia/566874-zaporozhye-waste-storage-shelled/?utm_source=rss&utm_medium=rss&utm_campaign=RSS UN NUCLEAR CHIEF DECLARES RADIOACTIVE WASTE RECYCLING DIFFICULT Chinese Radiation Protection Res Institute Seeks Patent for Radioactive Waste Resin Dehydration Metering Feeding Device EDF says radioactive leak at Civaux reactor not due to... https://www.dailymail.co.uk/wires/reuters/article-11403041/EDF-says-radioactive-leak-Civaux-reactor-not-welding.html?ns_mchannel=rss&ns_campaign=1490&ito=1490 Chinese Radiation Protection Res Institute Submits Chinese Patent Application for Radioactive Waste Resin Wet Oxidation Device
Frequently, keywords of interest to us are mentioned in varying sequences of terms. For example, HSBC's division in China could appear in multiple forms: “HSBC China”, “HSBC’s branches in China”, “In China, HSBC is introducing new…” , etc.
Proximity search is a feature that enables user to broaden the search criteria to return these combinations. “Proximity” refers to the distance, in terms, between two searched terms in a story. For example, "HSBC China"~5 only returns stories that mention "HSBC" and "China", where there is a maximum of four words in between them.
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
#, 'body': 'HSBC AND China'
, 'body': '"HSBC China"~4'
, 'sort_by' : 'relevance'
, 'language[]' : ['en']
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
keywords = ["HSBC"]
for story in stories:
print('##################')
print(story['published_at'])
print(story['id'])
print(story['title'])
print(story['words_count'])
print(story['links']['permalink'])
for item in keywords:
print('Keyword mention:')
print_keyword_mention(story, 'body', item)
print()
{'body': '"HSBC China"~4', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS', 'sort_by': 'relevance'} Mercedes-Benz becomes 1st MNC to issue green Panda bond in China http://www.shine.cn/biz/finance/2211283456/ Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T13:26:26Z 5079173598 Mercedes-Benz becomes 1st MNC to issue green Panda bond in China 330 http://www.shine.cn/biz/finance/2211283456/ Keyword mention: HSBC announced it has helped Mercedes-Benz to issue 500 ################## 2022-11-28T02:35:04Z 5078530062 HSBC's smart supply chain breaks the circle again to empower the digital future 1620 https://www.tellerreport.com/business/2022-11-28-hsbc-s-smart-supply-chain-breaks-the-circle-again-to-empower-the-digital-future.HyVx3aq-Po.html Keyword mention: reasonable growth of the quantity" is the goal of the future supply chain development. Among them, HSBC China won the regional awards of "Best Digital Tra ################## 2022-11-28T15:50:50Z 5079379311 October 31 Asia bond pipeline: What's coming up? 1277 Keyword mention: (Asia), CMB Wing Lung, Citi, CMBC, CEB, CTBC, China PA Securities, Guotai Junan, Guosen Securities, HSBC Huatai Intl, Haitong Intl, ICBC (Asia), Industria ################## 2022-11-28T15:54:58Z 5079383946 October 27 Asia bond pipeline: What's coming up? 1251 Keyword mention: Price to be set by Dutch auction | Tender deadline November 3 HSBC (Dealer manager) | Kroll Issuer Services (Tender a ################## 2022-11-28T15:52:27Z 5079381149 October 28 Asia bond pipeline: What's coming up? 1303 Keyword mention: (Asia), CMB Wing Lung, Citi, CMBC, CEB, CTBC, China PA Securities, Guotai Junan, Guosen Securities, HSBC Huatai Intl, Haitong Intl, ICBC (Asia), Industria
AYLIEN leverages two industry standard taxonomies in our news categorisation but we also leverage our own propriertary taxonomy - the Smart Tagger.
Smart Tagger leverages state-of-the-art classification models that have been built using a vast collection of manually tagged news articles based on domain-specific industry and topical taxonomies. Smart Tagger uses a highly effective rule-based classification system for identifying categorical and industry-related news content.
As part of the Smart Tagger update we’re introducing 2 new classification taxonomies; the AYLIEN Industry Taxonomy and the AYLIEN Category Taxonomy, which incorporates 2 curated category groupings; Adverse Events and Trading Impact Events.
You can explore these taxonomies here.
A wide and deep collection of topical categories covering popular topics specifically curated for the business and finance world.
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:"Environmental, Social and Governance"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:"Environmental, Social and ' 'Governance"}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot http://activerain.com/blogsview/5760941/nov-2022-wisconsin-forestland-sold-report-vernon-county--hunting--timber--investments--market-snapshot#article-comments-section Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot ################## 2022-11-28T16:58:33Z Residents urged to donate to Christmas clothing drive ################## 2022-11-28T16:58:12Z COP27 climate alarmists see oil demand hitting 18-year highs ################## 2022-11-28T16:57:50Z Tata Communications and Intertec Systems expand partnership, set up Cyber Security Operations Centre in UAE ################## 2022-11-28T16:57:18Z City of Houston Is Under a Water Boil Advisory, Affecting Millions
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND id:ay.lifesoc.esg}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND id:ay.lifesoc.esg}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot http://activerain.com/blogsview/5760941/nov-2022-wisconsin-forestland-sold-report-vernon-county--hunting--timber--investments--market-snapshot#article-comments-section Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z NOV 2022 WISCONSIN FORESTLAND SOLD REPORT Vernon County; Hunting, Timber, Investments! Market Snapshot ################## 2022-11-28T16:58:12Z COP27 climate alarmists see oil demand hitting 18-year highs ################## 2022-11-28T16:58:01Z Leeward Renewable Energy closes funding for US solar projects ################## 2022-11-28T16:57:50Z Tata Communications and Intertec Systems expand partnership, set up Cyber Security Operations Centre in UAE ################## 2022-11-28T16:57:18Z City of Houston Is Under a Water Boil Advisory, Affecting Millions
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:"Disasters"}} NOT categories:{{taxonomy:aylien AND label:"Philanthropy"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:"Disasters"}} NOT ' 'categories:{{taxonomy:aylien AND label:"Philanthropy"}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} Hawaii's Mauna Loa starts to erupt, sending ash nearby https://www.chronicle-tribune.com/news/wire/hawaii-s-mauna-loa-starts-to-erupt-sending-ash-nearby/article_dd2bdac5-bc7e-56e0-9f6e-3407d90865e4.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z Hawaii's Mauna Loa starts to erupt, sending ash nearby ################## 2022-11-28T16:58:41Z Landslide kills at least 14 attending funeral in Cameroon capital | CNN ################## 2022-11-28T16:57:11Z Hawaii's Mauna Loa volcano starts to erupt, sending ash nearby ################## 2022-11-28T16:56:07Z Hawaii's Mauna Loa, the world's largest active volcano, erupted for the first time in nearly 40 years ################## 2022-11-28T16:55:56Z Mauna Loa is erupting for the first time since 1984, prompting an ashfall advisory for Hawaii's Big Island
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:("Disasters" "Fraud")}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:("Disasters" "Fraud")}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} Hawaii's Mauna Loa starts to erupt, sending ash nearby https://www.chronicle-tribune.com/news/wire/hawaii-s-mauna-loa-starts-to-erupt-sending-ash-nearby/article_dd2bdac5-bc7e-56e0-9f6e-3407d90865e4.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-28T16:58:47Z Hawaii's Mauna Loa starts to erupt, sending ash nearby ################## 2022-11-28T16:58:41Z Landslide kills at least 14 attending funeral in Cameroon capital | CNN ################## 2022-11-28T16:58:10Z Cuomo-era New York corruption cases go before U.S. Supreme Court ################## 2022-11-28T16:57:11Z Hawaii's Mauna Loa volcano starts to erupt, sending ash nearby ################## 2022-11-28T16:56:46Z Irishman who stole €185,000 in social welfare payments says 'it was a victimless crime' More for you React Comments | 12
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'categories:{{taxonomy:aylien AND label:(Disasters) AND score:[0.7 TO *] sort_by(score)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'categories:{{taxonomy:aylien AND label:(Disasters) AND score:[0.7 TO ' '*] sort_by(score)}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} Some without water as sinkhole opens ground under GA truck | Columbus Ledger-Enquirer https://www.ledger-enquirer.com/news/state/georgia/article268916217.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-18T18:08:42Z Some without water as sinkhole opens ground under GA truck | Columbus Ledger-Enquirer ################## 2022-11-18T17:07:27Z Wildfires often lead to dust storms – and they’re getting bigger ################## 2022-11-18T17:50:20Z Earthquake of magnitude 6.9 shakes Indonesia ################## 2022-11-18T17:30:05Z Strong earthquake shakes western Indonesia; no tsunami alert ################## 2022-11-18T17:28:58Z When Is Hurricane Season In Florida And How To Prepare For It
A robust collection of multilevel tags that represent the industry a news article is covering.
Users can seach for Industry verticals using similar syntax as AYLIEN Categories.
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'industries: {{"Coal Mining" "Agriculture and Fishing" AND score:[0.7 TO *] sort_by(score)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'industries: {{"Coal Mining" "Agriculture and Fishing" AND score:[0.7 ' 'TO *] sort_by(score)}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators https://agrospectrumindia.com/2022/11/18/13th-agrovision-to-promote-agritech-startups-grassroot-innovators-though-special-pavilion.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-18T17:55:02Z 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators ################## 2022-11-18T17:56:29Z Kirin Holdings - Chateau Mercian Mariko Winery Chosen Yet Again By 'World's Best Vineyards 2022' ################## 2022-11-18T17:05:31Z Report suggests big changes for ag in Upper Rio Grande River basin ################## 2022-11-18T18:20:16Z Worldwide Microgreens Industry to 2027 - by Type, Farming Technique, Growth Medium, Distribution Channel, End-use, Company and Region ################## 2022-11-18T18:22:28Z Markham Vineyards Reopens Historic Tasting Room After Extensive Renovations
params = {
'published_at.start': 'NOW-10DAYS'
, 'published_at.end': 'NOW'
, 'language[]' : ['en']
, 'aql': 'industries: {{in.mat.coalmine in.agfish AND score:[0.7 TO *] sort_by(score)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##################')
print(story['published_at'])
print(story['title'])
print()
{'aql': 'industries: {{in.mat.coalmine in.agfish AND score:[0.7 TO *] ' 'sort_by(score)}}', 'language[]': ['en'], 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS'} 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators https://agrospectrumindia.com/2022/11/18/13th-agrovision-to-promote-agritech-startups-grassroot-innovators-though-special-pavilion.html Fetched 5 stories. Total story count so far: 5 ################## 2022-11-18T17:55:02Z 13th Agrovision sets up special pavilion for Agritech Startups & Grassroot Innovators ################## 2022-11-18T17:31:11Z Developments In the World of Fishing Sonar ################## 2022-11-18T17:56:29Z Kirin Holdings - Chateau Mercian Mariko Winery Chosen Yet Again By 'World's Best Vineyards 2022' ################## 2022-11-18T18:05:27Z After 7,000 years, Turkish wines are hitting the big time ################## 2022-11-18T18:05:14Z Soft lending for Russian agriculture to grow nearly twofold in 2022 to 177 bln rubles - AgMin
Similarly, we may be interested in searching for certain recurring subjects appearing in the news for example, banks, companies, dogs or even aliens! We could do this using keyword search but AYLIEN provides a solution to this problem by classifying some words as "enties".
What is an entity? The Oxford English Dictionary provides a basic starting point of what an entity is, with its definition being "a thing with distinct and independent existence". Learn more about searching for entities here.
We can use entity types to search for groups of entities without the need for defining an exhaustive list of DBPedia links.
Returning to our query that pulled stories classifed as "banking", let's pull all articles categorised as banking that also feature a "Company" or "Bank" entity type in the title:
N.B. AYLIEN's knowlede base switched from using DBPedia (V2 entities) to Wikidata (V3 entities) in February 2021. If you recquire syntax relating to V2, please contact sales@aylien.com.
# define the query parameters
params = {
'language[]': ['en'],
'published_at.start':'NOW-2DAYS',
'published_at.end':'NOW',
'categories.taxonomy': 'iptc-subjectcode',
'categories.id[]': ['04006002'],
'entities.title.type[]': ["Company", "Bank"],
'cursor': '*',
'per_page' : 10
}
print(params)
stories = get_top_ranked_stories(params, 10)
print('************')
print("Fetched %s stories" %(len(stories)))
{'language[]': ['en'], 'published_at.start': 'NOW-2DAYS', 'published_at.end': 'NOW', 'categories.taxonomy': 'iptc-subjectcode', 'categories.id[]': ['04006002'], 'entities.title.type[]': ['Company', 'Bank'], 'cursor': '*', 'per_page': 10} {'categories.id[]': ['04006002'], 'categories.taxonomy': 'iptc-subjectcode', 'cursor': '*', 'entities.title.type[]': ['Company', 'Bank'], 'language[]': ['en'], 'per_page': 10, 'published_at.end': 'NOW', 'published_at.start': 'NOW-2DAYS'} Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago https://journaltimes.com/lifestyles/health-med-fit/wall-street-slips-as-lockdown-protests-spread-in-china/article_81621f36-0538-520e-9234-9e04289ca9ab.html Fetched 10 stories. Total story count so far: 10 ************ Fetched 10 stories
Let's look closely at the first story in this output and review the entities in the title.
Note, some entities will be linked to a Wikiedata URLs. AYLIEN uses Wikidata to train a vast knowledge base in order to identify entities.
Other entities may not be linked to a DBPedia URL. AYLEIN also utilises a Named Entity Recognisition Model to identify entities in cases where they can't be identified from the knowledge base.
for story in stories[0:1]:
print(story['title'])
print('##############################################')
for entity in stories[0]['entities'][0:5]:
pprint(entity)
print()
Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago ############################################## {'body': {'sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 4, 'mentions': [{'index': {'end': 278, 'start': 273}, 'sentiment': {'confidence': 0.63, 'polarity': 'negative'}}, {'index': {'end': 1534, 'start': 1529}, 'sentiment': {'confidence': 0.76, 'polarity': 'negative'}}, {'index': {'end': 1878, 'start': 1873}, 'sentiment': {'confidence': 0.56, 'polarity': 'negative'}}, {'index': {'end': 2475, 'start': 2470}, 'sentiment': {'confidence': 0.61, 'polarity': 'negative'}}], 'text': 'China'}, {'frequency': 1, 'mentions': [{'index': {'end': 2542, 'start': 2535}, 'sentiment': {'confidence': 0.54, 'polarity': 'positive'}}], 'text': 'Chinese'}]}, 'external_ids': {}, 'id': 'Q148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q148', 'wikipedia': 'https://en.wikipedia.org/wiki/China'}, 'overall_frequency': 6, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 49}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'China'}]}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 85, 'start': 72}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 400, 'start': 387}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 856, 'start': 843}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 1212, 'start': 1199}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4601, 'start': 4588}, 'sentiment': {'confidence': 0.77, 'polarity': 'positive'}}], 'text': 'KEB Hana Bank'}]}, 'external_ids': {}, 'id': 'Q484047', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q484047', 'wikipedia': 'https://en.wikipedia.org/wiki/Hana_Bank'}, 'overall_frequency': 5, 'overall_prominence': 0.97, 'overall_sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Business', 'Organization']} {'body': {'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 107, 'start': 102}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 422, 'start': 417}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 878, 'start': 873}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 1234, 'start': 1229}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4623, 'start': 4618}, 'sentiment': {'confidence': 0.69, 'polarity': 'positive'}}], 'text': 'Seoul'}]}, 'external_ids': {}, 'id': 'Q8684', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q8684', 'wikipedia': 'https://en.wikipedia.org/wiki/Seoul'}, 'overall_frequency': 5, 'overall_prominence': 0.93, 'overall_sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['City', 'Location', 'Organization', 'Community']} {'body': {'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 120, 'start': 109}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 435, 'start': 424}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 891, 'start': 880}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 1247, 'start': 1236}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4636, 'start': 4625}, 'sentiment': {'confidence': 0.68, 'polarity': 'positive'}}], 'text': 'South Korea'}, {'frequency': 1, 'mentions': [{'index': {'end': 762, 'start': 750}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}], 'text': 'South Korean'}]}, 'external_ids': {}, 'id': 'Q884', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q884', 'wikipedia': 'https://en.wikipedia.org/wiki/South_Korea'}, 'overall_frequency': 6, 'overall_prominence': 0.92, 'overall_sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'surface_forms': []}, 'external_ids': {}, 'id': 'N279424833613967807707022612475825359786', 'overall_frequency': 1, 'overall_prominence': 0.91, 'overall_sentiment': {'confidence': 0.55, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.55, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 79, 'start': 63}, 'sentiment': {'confidence': 0.55, 'polarity': 'neutral'}}], 'text': 'DAMIAN J. TROISE'}]}, 'types': ['Human']}
Depending on your query, we should see that the classifier picked up some entities. We can also see some of the entities are linked to Wikidata URLs — we will return to this below.
We are not limited to working with entities in the title however. We can also search for entities in the body of the article. Let's print out the first 10 entities in the body. We can see that AYLIEN's enrichment process identifies a whole range of entity types.
for story in stories[0:1]:
print(story['title'])
print('##############################################')
for entity in stories[0]['entities'][0:3]:
for surface_form in entity['body']['surface_forms']:
pprint(entity)
print()
Wall Street slips as lockdown protests spread in China By DAMIAN J. TROISE - AP Business Writer Nov 28, 2022 Nov 28, 2022 Updated 4 min ago ############################################## {'body': {'sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 4, 'mentions': [{'index': {'end': 278, 'start': 273}, 'sentiment': {'confidence': 0.63, 'polarity': 'negative'}}, {'index': {'end': 1534, 'start': 1529}, 'sentiment': {'confidence': 0.76, 'polarity': 'negative'}}, {'index': {'end': 1878, 'start': 1873}, 'sentiment': {'confidence': 0.56, 'polarity': 'negative'}}, {'index': {'end': 2475, 'start': 2470}, 'sentiment': {'confidence': 0.61, 'polarity': 'negative'}}], 'text': 'China'}, {'frequency': 1, 'mentions': [{'index': {'end': 2542, 'start': 2535}, 'sentiment': {'confidence': 0.54, 'polarity': 'positive'}}], 'text': 'Chinese'}]}, 'external_ids': {}, 'id': 'Q148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q148', 'wikipedia': 'https://en.wikipedia.org/wiki/China'}, 'overall_frequency': 6, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 49}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'China'}]}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 4, 'mentions': [{'index': {'end': 278, 'start': 273}, 'sentiment': {'confidence': 0.63, 'polarity': 'negative'}}, {'index': {'end': 1534, 'start': 1529}, 'sentiment': {'confidence': 0.76, 'polarity': 'negative'}}, {'index': {'end': 1878, 'start': 1873}, 'sentiment': {'confidence': 0.56, 'polarity': 'negative'}}, {'index': {'end': 2475, 'start': 2470}, 'sentiment': {'confidence': 0.61, 'polarity': 'negative'}}], 'text': 'China'}, {'frequency': 1, 'mentions': [{'index': {'end': 2542, 'start': 2535}, 'sentiment': {'confidence': 0.54, 'polarity': 'positive'}}], 'text': 'Chinese'}]}, 'external_ids': {}, 'id': 'Q148', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q148', 'wikipedia': 'https://en.wikipedia.org/wiki/China'}, 'overall_frequency': 6, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.64, 'polarity': 'negative'}, 'stock_tickers': [], 'title': {'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 49}, 'sentiment': {'confidence': 0.6, 'polarity': 'neutral'}}], 'text': 'China'}]}, 'types': ['Sovereign_state', 'Location', 'Community', 'Country', 'State_(polity)', 'Organization']} {'body': {'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 85, 'start': 72}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 400, 'start': 387}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 856, 'start': 843}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 1212, 'start': 1199}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4601, 'start': 4588}, 'sentiment': {'confidence': 0.77, 'polarity': 'positive'}}], 'text': 'KEB Hana Bank'}]}, 'external_ids': {}, 'id': 'Q484047', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q484047', 'wikipedia': 'https://en.wikipedia.org/wiki/Hana_Bank'}, 'overall_frequency': 5, 'overall_prominence': 0.97, 'overall_sentiment': {'confidence': 0.93, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['Business', 'Organization']} {'body': {'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 5, 'mentions': [{'index': {'end': 107, 'start': 102}, 'sentiment': {'confidence': 0.95, 'polarity': 'neutral'}}, {'index': {'end': 422, 'start': 417}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 878, 'start': 873}, 'sentiment': {'confidence': 0.94, 'polarity': 'neutral'}}, {'index': {'end': 1234, 'start': 1229}, 'sentiment': {'confidence': 0.93, 'polarity': 'neutral'}}, {'index': {'end': 4623, 'start': 4618}, 'sentiment': {'confidence': 0.69, 'polarity': 'positive'}}], 'text': 'Seoul'}]}, 'external_ids': {}, 'id': 'Q8684', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q8684', 'wikipedia': 'https://en.wikipedia.org/wiki/Seoul'}, 'overall_frequency': 5, 'overall_prominence': 0.93, 'overall_sentiment': {'confidence': 0.94, 'polarity': 'neutral'}, 'stock_tickers': [], 'title': {'surface_forms': []}, 'types': ['City', 'Location', 'Organization', 'Community']}
We have seen how AYLIEN's NLP enrichment identifies entities and that some entities are tagged with a Wikidata URLs. Entities can be useful when a keyword or search term can refer to multiple entities. For example, let's imagine we are interested in finding news regarding the company, Apple — how do we restrict searches for the company only and ignore searches for the fruit? We could search for the keyword "Apple" and also search for company entity types as described above, but then we would run the risk of returning titles that include companies other than Apple Inc. but that mention the fruit, apple. We can, however, perform a more specific search using Wikidata and Wikipedia URLs.
Wikidata is a semantic web project that extracts structured information created as part of the Wikipedia project where distinct entities are referred to by URIs (like https://en.wikipedia.org/wiki/Apple_Inc. and https://www.wikidata.org/wiki/Q312). Using these URIs, we can perform very specific searches for topics and reduce the ambiguity in our query. Searching by URI will also identify different surface forms that link to Apple e.g. "Apple", "Apple Inc." and the Apple stock ticker, "AAPL".
Below, we'll demonstrate a search for Citigroup using its Wikiedpia URL.
N.B. AYLIEN's knowlede base switched from using DBPedia (V2 entities) to Wikidata (V3 entities) in February 2021. If you recquire syntax relating to V2, please contact sales@aylien.com.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{links.wikipedia:"https://en.wikipedia.org/wiki/Citigroup" }}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Citigroup')
print()
{'aql': 'entities: {{links.wikipedia:"https://en.wikipedia.org/wiki/Citigroup" ' '}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} Wells Fargo & Company MN Sells 97,148 Shares of Analog Devices, Inc. (NASDAQ:ADI) https://www.dispatchtribunal.com/2022/11/28/wells-fargo-company-mn-sells-97148-shares-of-analog-devices-inc-nasdaqadi.html Fetched 5 stories. Total story count so far: 5 ############# Wells Fargo & Company MN Sells 97,148 Shares of Analog Devices, Inc. (NASDAQ:ADI) https://www.dispatchtribunal.com/2022/11/28/wells-fargo-company-mn-sells-97148-shares-of-analog-devices-inc-nasdaqadi.html Keyword mention: wn 85.22% of the company’s stock. A number of research analysts have issued reports on ADI shares. Citigroup upped their target price on shares of Analog Devic KPMG bets on Manchester with tech jobs and 'sprint' rooms https://www.accountingtoday.com/articles/kpmg-bets-on-manchester-with-tech-jobs-and-sprint-rooms Keyword mention: he latest international firm to grow beyond London, with banks such as Goldman Sachs Group Inc. and Citigroup Inc. finding it easier to secure lower costs and s Beaird Harris Wealth Management LLC Has $138,000 Holdings in DTE Energy (NYSE:DTE) https://www.com-unik.info/2022/11/28/beaird-harris-wealth-management-llc-has-138000-holdings-in-dte-energy-nysedte.html Keyword mention: erts: Wall Street Analysts Forecast Growth A number of research firms have issued reports on DTE. Citigroup cut their price target on DTE Energy from $146.00 Teton Advisors Inc. Has $1.72 Million Position in World Wrestling Entertainment, Inc. (NYSE:WWE) https://www.dailypolitical.com/2022/11/28/teton-advisors-inc-has-1-72-million-position-in-world-wrestling-entertainment-inc-nysewwe.html Keyword mention: to $50.00 and gave the stock an “underweight” rating in a research note on Wednesday, August 17th. Citigroup boosted their target price on shares of World Wres Gamco Investors INC. ET AL Raises Stock Position in Tredegar Co. (NYSE:TG) https://www.themarketsdaily.com/2022/11/28/gamco-investors-inc-et-al-raises-stock-position-in-tredegar-co-nysetg.html Keyword mention: rials company’s stock valued at $435,000 after buying an additional 1,826 shares during the period. Citigroup Inc. boosted its stake in Tredegar by 9.4% during
We can search for entities using their Wikidata ID as per below.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{links.wikidata:"https://www.wikidata.org/wiki/Q219508" }}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Citigroup')
print()
{'aql': 'entities: {{links.wikidata:"https://www.wikidata.org/wiki/Q219508" }}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} Teton Advisors Inc. Has $1.72 Million Position in World Wrestling Entertainment, Inc. (NYSE:WWE) https://www.dailypolitical.com/2022/11/28/teton-advisors-inc-has-1-72-million-position-in-world-wrestling-entertainment-inc-nysewwe.html Fetched 5 stories. Total story count so far: 5 ############# Teton Advisors Inc. Has $1.72 Million Position in World Wrestling Entertainment, Inc. (NYSE:WWE) https://www.dailypolitical.com/2022/11/28/teton-advisors-inc-has-1-72-million-position-in-world-wrestling-entertainment-inc-nysewwe.html Keyword mention: to $50.00 and gave the stock an “underweight” rating in a research note on Wednesday, August 17th. Citigroup boosted their target price on shares of World Wres Gamco Investors INC. ET AL Raises Stock Position in Tredegar Co. (NYSE:TG) https://www.themarketsdaily.com/2022/11/28/gamco-investors-inc-et-al-raises-stock-position-in-tredegar-co-nysetg.html Keyword mention: rials company’s stock valued at $435,000 after buying an additional 1,826 shares during the period. Citigroup Inc. boosted its stake in Tredegar by 9.4% during Coherent slips even as Deutsche Bank upgrades, saying bear case 'not as bad as feared' https://seekingalpha.com/news/3911636-coherent-slips-even-as-deutsche-bank-upgrades-saying-bear-case-not-as-bad-as-feared?utm_source=feed_news_all&utm_medium=referral Keyword mention: Trian Fund Management L.P. Raises Stake in General Electric (NYSE:GE) https://mayfieldrecorder.com/2022/11/28/trian-fund-management-l-p-raises-stake-in-general-electric-nysege.html Keyword mention: to $78.00 and set an “overweight” rating on the stock in a research report on Monday, October 3rd. Citigroup increased their price objective on shares of Gener Pin Oak Investment Advisors Inc. Increases Position in Kimbell Royalty Partners, LP (NYSE:KRP) https://slatersentinel.com/news/2022/11/28/pin-oak-investment-advisors-inc-increases-position-in-kimbell-royalty-partners-lp-nysekrp.html Keyword mention: 's stock. Analyst Upgrades and Downgrades KRP has been the topic of a number of research reports. Citigroup assumed coverage on Kimbell Royalty Partners in a
Sometimes we might want to search for an entity by surface form (i.e. the text metnioned) rather than the wiki ID. This may because we want to limit to a certain surface form (MSFT and not Microsoft) or becuase the entity is not in wikidata and so not in our kenoweldege base. Our Named Entity Recognition model and still recognise entities that are not in wikidata, based on the context of the document. This is useful for searching for lesser known companies, SMEs or start-ups.
In the code below I use the code surface_forms.text - this is a full text search. This means that
In contrast, searching via surface_forms on its own will perform an exact string match search i.e. case sensitive with special characters included.
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
#, 'aql': 'entities: {{surface_forms:"Boeing"}}'
, 'aql': 'entities: {{surface_forms.text:"Boeing"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Boeing')
print()
{'aql': 'entities: {{surface_forms.text:"Boeing"}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} ВОЙНА В Украине https://izvestia.kiev.ua/item/show/148204 Fetched 5 stories. Total story count so far: 5 ############# ВОЙНА В Украине https://izvestia.kiev.ua/item/show/148204 Keyword mention: Boeing has proposed to produce small diameter of land bas Russia Won't Stop Strikes until It Runs Out of Missiles, Ukraine's Zelenskiy Says https://english.aawsat.com/home/article/4013126/russia-won%E2%80%99t-stop-strikes-until-it-runs-out-missiles-ukraine%E2%80%99s-zelenskiy-says Keyword mention: . In the latest example of Western military aid to Kyiv, the Pentagon is considering a proposal by Boeing to supply Ukraine with cheap, small precision bomb محلل سياسى: استمرار الحرب الروسية الأوكرانية يضع مستقبل أوروبا على المحك https://www.youm7.com/story/2022/11/28/%D9%85%D8%AD%D9%84%D9%84-%D8%B3%D9%8A%D8%A7%D8%B3%D9%89-%D8%A7%D8%B3%D8%AA%D9%85%D8%B1%D8%A7%D8%B1-%D8%A7%D9%84%D8%AD%D8%B1%D8%A8-%D8%A7%D9%84%D8%B1%D9%88%D8%B3%D9%8A%D8%A9-%D8%A7%D9%84%D8%A3%D9%88%D9%83%D8%B1%D8%A7%D9%86%D9%8A%D8%A9-%D9%8A%D8%B6%D8%B9-%D9%85%D8%B3%D8%AA%D9%82%D8%A8%D9%84-%D8%A3%D9%88%D8%B1%D9%88%D8%A8%D8%A7-%D8%B9%D9%84%D9%89/5992901 Keyword mention: the mental image of how America can help its allies and NATO countries. The Washington study of the Boeing proposal to provide "Keeff" with accurate bombs is USA harkitsee Boeingin ja Saabin kehittämän täsmäpommin lähettämistä Ukrainaan – GLSDB-pommi mahdollistaisi iskut yli 100 km Venäjän selustaan https://www.talouselama.fi/uutiset/usa-harkitsee-boeingin-ja-saabin-kehittaman-tasmapommin-lahettamista-ukrainaan-glsdb-pommi-mahdollistaisi-iskut-yli-100-km-venajan-selustaan/abaa49d0-d08e-492e-97f4-f0a0af921177 Keyword mention: ng sending the GLSDB (Ground-Lunched Small Diamond Bomb) to Ukraine, which was developed jointly by Boeing and Saab. The news agency reports on the nameless Russia won't stop strikes until it runs out of missiles, Ukraine's Zelenskiy says https://nationalpost.com/pmn/news-pmn/russia-wont-stop-strikes-until-it-runs-out-of-missiles-ukraines-zelenskiy-says Keyword mention: . In the latest example of Western military aid to Kyiv, the Pentagon is considering a proposal by Boeing to supply Ukraine with cheap, small precision bomb
We can search for entities using their stock ticker (where supported).
params = {
'published_at.start': 'NOW-1MONTH'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{stock_ticker:GOOGL }}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Google')
print()
{'aql': 'entities: {{stock_ticker:GOOGL }}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-1MONTH'} JEPI Vs. SPY: The Relative Lead Unlikely To Continue In 2023 https://seekingalpha.com/article/4560842-jepi-vs-spy-the-relative-lead-unlikely-to-continue-in-2023?source=feed_all_articles Fetched 5 stories. Total story count so far: 5 ############# JEPI Vs. SPY: The Relative Lead Unlikely To Continue In 2023 https://seekingalpha.com/article/4560842-jepi-vs-spy-the-relative-lead-unlikely-to-continue-in-2023?source=feed_all_articles Keyword mention: Investors Increasingly Impatient with Slow Pace of Autonomous Vehicles https://programbusiness.com/news/investors-increasingly-impatient-with-slow-pace-of-autonomous-vehicles/ Keyword mention: ut costs during an economic slowdown. An influential hedge fund has also questioned Alphabet Inc.’s Google s years-long effort to advance self-driving techno Yahoo buys nearly 25% stake in advertising tech firm Taboola https://infotechlead.com/digital/yahoo-buys-nearly-25-stake-in-advertising-tech-firm-taboola-75712 Keyword mention: Allianz Asset Management GmbH Acquires 30,898 Shares of Alphabet Inc. (NASDAQ:GOOG) https://mayfieldrecorder.com/2022/11/28/allianz-asset-management-gmbh-acquires-30898-shares-of-alphabet-inc-nasdaqgoog.html Keyword mention: , Europe, the Middle East, Africa, the Asia-Pacific, Canada, and Latin America. It operates through Google Services, Google Cloud, and Other Bets segments. T SPACs Slap Some Lipstick on Their Penny-Stock Pigs https://medworm.com/1053087611/spacs-slap-some-lipstick-on-their-penny-stock-pigs/ Keyword mention:
Sometimes if we are searching for an entity surface form, we may want to specify the entity type to help identify the correct entity. This may be becuase the entity is not recognised in wikidata and therefore not in the AYLIEN knowledge base.
However, our Named Entity Recognistion model can predict what entity type the entity is (i.e. Person, Organization, Location etc.) even if it is not in wikidata. This enables us to search for entity surface forms and explictly state what type of entity they should be.
Below we searcg for the surface form "Apple" and specify that we are looking for an Organization entity type.
params = {
"aql": "entities:{{surface_forms.text:Apple AND type:Organization}}"
, "categories_taxonomy": "iptc-subjectcode"
, "categories_id": ["04000000"]
, "language": ["en"]
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print('Keyword mention:')
print_keyword_mention(story, 'body', 'Apple')
print()
{'aql': 'entities:{{surface_forms.text:Apple AND type:Organization}}', 'categories_id': ['04000000'], 'categories_taxonomy': 'iptc-subjectcode', 'language': ['en'], 'per_page': 5} Ahead of Market: 10 things that will decide D-Street action on Tuesday https://economictimes.indiatimes.com/markets/stocks/news/ahead-of-market-10-things-that-will-decide-d-street-action-on-tuesday/articleshow/95835927.cms Fetched 5 stories. Total story count so far: 5 ############# Ahead of Market: 10 things that will decide D-Street action on Tuesday https://economictimes.indiatimes.com/markets/stocks/news/ahead-of-market-10-things-that-will-decide-d-street-action-on-tuesday/articleshow/95835927.cms Keyword mention: r Monday sales were set for a record.The biggest drag on the benchmark S&P 500 index, however, were Apple Inc shares, which fell 1.5% after a report that th WhatsApp Message Yourself feature starts rolling out: Here's how to use it Keyword mention: eature, users must update the WhatsApp app on their smartphone. To do so, head to Google Play Store/Apple App Store and install the latest version of the ap 'A Christmas miracle': Woman kidnapped as child reunites with family 51 years later https://headtopics.com/us/a-christmas-miracle-woman-kidnapped-as-child-reunites-with-family-51-years-later-32220438 Keyword mention: ry isn't too difficult to figure out, the change of pace for a Hallmark movie is welcomed. In 2021, Apple agreed to broadcast A Charlie Brown Christmas on P The Best Cyber Monday deals available now https://headtopics.com/us/the-best-cyber-monday-deals-available-now-32218332 Keyword mention: he 2021 iPad Pro 11-inch with an M1 chip, well, here is the follow-up: you can also get the 2nd gen Apple Pencil that works great with it at a $40 off price Amazon, union organizer head to court over COVID-based class racial-bias lawsuit https://thegrio.com/2022/11/28/amazon-union-organizer-smalls-head-to-court-covid-based-class-racial-bias-lawsuit/ Keyword mention: acility, it would weaken the claims within the racial-bias lawsuit. TheGrio is FREE on your TV via Apple TV, Amazon Fire, Roku and Android TV. Also, please
We can specify where in the article we want to find the entity by specifying the title or body elements.
params = {
"aql": "entities:{{element:title AND surface_forms:Apple}}"
, "categories_taxonomy": "iptc-subjectcode"
, "categories_id": ["04000000"]
, "language": ["en"]
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print()
{'aql': 'entities:{{element:title AND surface_forms:Apple}}', 'categories_id': ['04000000'], 'categories_taxonomy': 'iptc-subjectcode', 'language': ['en'], 'per_page': 5} Apple’s Change to AirDrop Is Hurting Chinese Protests https://www.techinvestornews.com/Tech-News/Latest-Headlines/apples-change-to-airdrop-is-hurting-chinese-protests Fetched 5 stories. Total story count so far: 5 ############# Apple’s Change to AirDrop Is Hurting Chinese Protests https://www.techinvestornews.com/Tech-News/Latest-Headlines/apples-change-to-airdrop-is-hurting-chinese-protests The Best Apple Cyber Monday Deals https://headtopics.com/us/the-best-apple-cyber-monday-deals-32218396 Gwyneth Paltrow Reunites With Look-Alike Daughter Apple, 18, In NYC On Teen's College Break: Photos https://www.newsbreak.com/news/2839249825578/gwyneth-paltrow-reunites-with-look-alike-daughter-apple-18-in-nyc-on-teen-s-college-break-photos Why Apple Stock Is Sinking Today https://www.fool.com/investing/2022/11/28/why-apple-stock-is-sinking-today/?source=iedfolrf0000001 Snap up a £30 saving on the Apple Watch ultra this Cyber Monday https://theworldnews.net/gb-news/snap-up-a-ps30-saving-on-the-apple-watch-ultra-this-cyber-monday
We can add logic to search for multiple entities at once. Note in this example we are using the OR operator to search for one of two entities.
params = {
'published_at.start': 'NOW-30DAYS'
, 'published_at.end': 'NOW'
, 'aql': 'entities:{{element:title AND surface_forms: "Deloitte"}} OR entities:{{element:title AND surface_forms: "Accenture"}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print()
{'aql': 'entities:{{element:title AND surface_forms: "Deloitte"}} OR ' 'entities:{{element:title AND surface_forms: "Accenture"}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'} BRITISH COLUMBIA INVESTMENT MANAGEMENT Corp Sells 24,262 Shares of Accenture plc (NYSE:ACN) https://baseballnewssource.com/2022/11/28/british-columbia-investment-management-corp-sells-24262-shares-of-accenture-plc-nyseacn/7861473.html Fetched 5 stories. Total story count so far: 5 ############# BRITISH COLUMBIA INVESTMENT MANAGEMENT Corp Sells 24,262 Shares of Accenture plc (NYSE:ACN) https://baseballnewssource.com/2022/11/28/british-columbia-investment-management-corp-sells-24262-shares-of-accenture-plc-nyseacn/7861473.html Tvh оцифровывает свою глобальную сеть складов с помощью Körber и Accenture — Data Intelligence. https://zephyrnet.com/ru/tvh-%D0%BE%D1%86%D0%B8%D1%84%D1%80%D0%BE%D0%B2%D1%8B%D0%B2%D0%B0%D0%B5%D1%82-%D1%81%D0%B2%D0%BE%D1%8E-%D0%B3%D0%BB%D0%BE%D0%B1%D0%B0%D0%BB%D1%8C%D0%BD%D1%83%D1%8E-%D1%81%D0%B5%D1%82%D1%8C-%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2-%D1%81-%D0%BF%D0%BE%D0%BC%D0%BE%D1%89%D1%8C%D1%8E-korber-%D0%B8-accure-5/ Ensign Peak Advisors Inc Lowers Position in Accenture plc (NYSE:ACN) https://www.americanbankingnews.com/2022/11/28/ensign-peak-advisors-inc-lowers-position-in-accenture-plc-nyseacn.html Purdue, Accenture sign five-year agreement in support of smart manufacturing https://www.purdue.edu/newsroom/releases/2022/Q4/purdue,-accenture-sign-five-year-agreement-in-support-of-smart-manufacturing.html Deloitte mandated for revisited Hassyan IWP
We can also limit to the stories we want by enttiy sentiment, as exemplified below. Here we will search for negative mentions of Citigroup.
params = {
"aql": "entities:{{element:title AND surface_forms:Citigroup AND sentiment:negative}}"
, "publised_at_start": "NOW-10DAYS"
, "period": "+1DAY"
, "language": ["en"]
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
print('#############')
for story in stories:
print(story['title'])
print(story['links']['permalink'])
print()
{'aql': 'entities:{{element:title AND surface_forms:Citigroup AND ' 'sentiment:negative}}', 'language': ['en'], 'per_page': 5, 'period': '+1DAY', 'publised_at_start': 'NOW-10DAYS'} Magna International (NYSE:MGA) Downgraded by Citigroup https://www.com-unik.info/2022/11/27/magna-international-nysemga-downgraded-by-citigroup.html Fetched 5 stories. Total story count so far: 5 ############# Magna International (NYSE:MGA) Downgraded by Citigroup https://www.com-unik.info/2022/11/27/magna-international-nysemga-downgraded-by-citigroup.html Magna International (NYSE:MGA) Downgraded by Citigroup https://www.thelincolnianonline.com/2022/11/27/magna-international-nysemga-downgraded-by-citigroup.html MacroGenics (NASDAQ:MGNX) Downgraded by Citigroup https://www.com-unik.info/2022/11/27/macrogenics-nasdaqmgnx-downgraded-by-citigroup.html MacroGenics (NASDAQ:MGNX) Downgraded by Citigroup https://www.thelincolnianonline.com/2022/11/27/macrogenics-nasdaqmgnx-downgraded-by-citigroup.html Magna International (NYSE:MGA) Downgraded by Citigroup https://zolmax.com/investing/magna-international-nysemga-downgraded-by-citigroup/8168431.html
Here we will isolate the Citigroup entity in the first story to show it is classified with negative sentiment.
for entity in stories[0]['entities']:
for surface_form in entity['title']['surface_forms']:
if 'Citigroup' in surface_form['text']:
pprint(entity)
{'body': {'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 9, 'start': 0}, 'sentiment': {'confidence': 0.72, 'polarity': 'neutral'}}], 'text': 'Citigroup'}]}, 'external_ids': {}, 'id': 'Q219508', 'links': {'wikidata': 'https://www.wikidata.org/wiki/Q219508', 'wikipedia': 'https://en.wikipedia.org/wiki/Citigroup'}, 'overall_frequency': 2, 'overall_prominence': 0.98, 'overall_sentiment': {'confidence': 0.72, 'polarity': 'neutral'}, 'stock_tickers': ['C'], 'title': {'sentiment': {'confidence': 0.53, 'polarity': 'negative'}, 'surface_forms': [{'frequency': 1, 'mentions': [{'index': {'end': 54, 'start': 45}, 'sentiment': {'confidence': 0.53, 'polarity': 'negative'}}], 'text': 'Citigroup'}]}, 'types': ['Business', 'Organization', 'Financial_institution']}
Entity prominence is a measure of how significant a mention of an entity is on a scale of 0-1.
Intuitively - as consumers of news - we know if an entity appears in the title, in the first paragaph or many times in an article, then it is pretty significant. AYLIEN's entioty prominence metric catpures this signficance.
We can use this as a query paramter to filter out insignificant mentions of an entity by setting an entity prominence threshold. We can also sort by entity prominence to see the most significant mentions first. For more ways to sort your query output see here.
params = {
'published_at.start': 'NOW-30DAYS'
, 'published_at.end': 'NOW'
, 'aql': 'entities: {{surface_forms: "Citigroup" AND overall_prominence:[0.6 TO *] sort_by(overall_prominence)}}'
, 'per_page' : 5
}
stories = get_top_ranked_stories(params, 5)
for story in stories:
print('##############')
print('Title:')
print_keyword_mention(story, 'title', 'Citigroup')
print()
print('Mention:')
print_keyword_mention(story, 'body', 'Citigroup')
{'aql': 'entities: {{surface_forms: "Citigroup" AND overall_prominence:[0.6 TO ' '*] sort_by(overall_prominence)}}', 'per_page': 5, 'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS'} Vestmark Advisory Solutions Inc. Purchases 10,915 Shares of Citigroup Inc. (NYSE:C) https://www.themarketsdaily.com/2022/10/29/vestmark-advisory-solutions-inc-purchases-10915-shares-of-citigroup-inc-nysec.html Fetched 5 stories. Total story count so far: 5 ############## Title: 10,915 Shares of Citigroup Inc. (NYSE:C) Mention: Citigroup Inc. (NYSE:C – Get Rating) by 16.5% during the sec ############## Title: Citigroup China Technology Forum 2022 Mention: a new form of trade, but the brutal development of the industry In this context, on October 28, the Citigroup based Changed Opportunities & Technology Forum | 2 ############## Title: Citigroup Lowers Visa (NYSE:V) Price Target to $238.00 Mention: Citigroup from $254.00 to $238.00 in a research note publish ############## Title: OSCO SHIPPING (OTCMKTS:CICOF) Cut to Sell at Citigroup Mention: Citigroup downgraded shares of COSCO SHIPPING ( OTCMKTS:CICO ############## Title: Citigroup Lowers YETI (NYSE:YETI) Price Target to $43.00 Mention: Citigroup from $57.00 to $43.00 in a report issued on Friday
So far we have pulled stories in English only. However, our News API supports 6 native languages and 10 translated languages:
Native Languages:
Translated Languages:
Let's perform a search in some native languages other than English. Here we'll search for stories featuring Citigroup in the title and print the native language title and an English title.
# define the query parameters
params = {
'language': ['de', 'fr', 'it', 'es', 'pt'],
'title': 'Citigroup',
'published_at.start':'NOW-10DAYS',
'published_at.end':'NOW',
'cursor': '*',
'per_page' : 50
}
print(params)
stories = get_top_ranked_stories(params, 100)
print('************')
print("Fetched %s stories" %(len(stories)))
for story in stories:
print(story['title'])
print(story['translations']['en']['title'])
print('')
{'language': ['de', 'fr', 'it', 'es', 'pt'], 'title': 'Citigroup', 'published_at.start': 'NOW-10DAYS', 'published_at.end': 'NOW', 'cursor': '*', 'per_page': 50} {'cursor': '*', 'language': ['de', 'fr', 'it', 'es', 'pt'], 'per_page': 50, 'published_at.end': 'NOW', 'published_at.start': 'NOW-10DAYS', 'title': 'Citigroup'} ¿Carlos Slim e Inbursa abandonan la carrera: ya no comprarán Banamex a Citigroup? https://www.capitalmexico.com.mx/mundo/carlos-slim-e-inbursa-abandonan-la-carrera-ya-no-compraran-banamex-a-citigroup/ Fetched 28 stories. Total story count so far: 28 Fetched 0 stories. Total story count so far: 28 ************ Fetched 28 stories ¿Carlos Slim e Inbursa abandonan la carrera: ya no comprarán Banamex a Citigroup? Are Slim and Inbursa leaving the race: will Banamex no longer buy Citigroup? Citigroup Citigroup Citigroup Citigroup Tesla, Inc. : Citigroup cambia a neutral | MarketScreener Tesla, Inc. : Citigroup changes to neutral | MarketScreener El multimillonario mexicano Carlos Slim descarta comprar Banamex a Citigroup Mexican millionaire Carlos Slim dismisses Banamex to Citigroup México: Banco de Slim se retira de compra de Banamex El grupo financiero Inbursa, del millonario mexicano Carlos Slim, anuncia su retiro del proceso de compra de Banamex, uno de los principales bancos de México que la corporación estadounidense Citigroup espera vender en los próximos meses Associated Press Nov 23, 2022 30 min ago Mexico: Slim Bank withdraws from Banamex The financial group Inbursa, of Mexican millionaire Carlos Slim, announces its withdrawal from the Banamex purchase process, one of Mexico's major banks that the American firm Citigroup expects to sell in the coming months sociated Press Nov 23, 2022 30 min ago México: Banco de Slim se retira de compra de Banamex El grupo financiero Inbursa, del millonario mexicano Carlos Slim, anuncia su retiro del proceso de compra de Banamex, uno de los principales bancos de México que la corporación estadounidense Citigroup espera vender en los próximos meses Associated Press Nov 23, 2022 14 min ago Mexico: Slim Bank withdraws from Banamex The financial group Inbursa, of Mexican millionaire Carlos Slim, announces its withdrawal from the Banamex purchase process, one of Mexico's major banks that the American firm Citigroup expects to sell in the coming months sociated Press Nov 23, 2022 14 min ago Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan DICK'S Sporting Goods, Inc. : El Citigroup continua con su recomendación de compra | MarketScreener DICK'S Sporting Goods, Inc. : The Citigroup continues with its purchase recommendation | MarketScreener El multimillonario mexicano Carlos Slim descarta comprar Banamex a Citigroup Mexican millionaire Carlos Slim dismisses Banamex to Citigroup Reguladores de EE.UU. pidieron a Citigroup mejorar su plan de simulación de quiebras US regulators They asked Citigroup to improve its bankruptcy simulation plan Reguladores instan a Citigroup a corregir plan de quiebra Regulators urge Citigroup to correct bankruptcy plan Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Reguladores de EU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Reguladores de EEUU instan a Citigroup a corregir plan de simulación de quiebra US regulators urge Citigroup to correct bankruptcy simulation plan Dell Technologies Inc. : Citigroup reitera su recomendación de compra | MarketScreener Dell Technologies Inc. : Citigroup reiterates its purchase recommendation | MarketScreener BP plc : Citigroup Cambia su recomendación a compra | MarketScreener BP Plc : Citigroup Change your purchase recommendation | MarketScreener Unity Software Inc. : Obtiene una recomendación de compra de Citigroup | MarketScreener Unit Software Inc. : Get a Citigroup Buy recommendation | MarketScreener Rackspace Technology, Inc. : El Citigroup se mantiene neutral | MarketScreener Rackspace Technology, Inc. : Citigroup remains neutral | MarketScreener Eneti Inc. : Obtiene una recomendación de compra de Citigroup | MarketScreener Eneti Inc. : Get a Citigroup Buy recommendation | MarketScreener Macy's, Inc. : El Citigroup se mantiene neutral | MarketScreener Macy's, Inc. : Citigroup remains neutral | MarketScreener Bath & Body Works, Inc. : recomendación de compra de Citigroup | MarketScreener Bath & Body Works, Inc. : Citigroup's Buy Recommendation | MarketScreener IBEX Limited : Citigroup permanece neutral | MarketScreener IBEX Limited : Citigroup remains neutral | MarketScreener Roblox Corporation : El Citigroup continua con un recomendación de compra | MarketScreener Robles Corporation : Citigroup continues with a shopping recommendation | MarketScreener NetEase, Inc. : Citigroup mantiene su recomendación de compra | MarketScreener NetEase, Inc. : Citigroup maintains its purchase recommendation | MarketScreener Autodesk, Inc. : Citigroup mantiene su recomendación de compra | MarketScreener Autodesk, Inc. : Citigroup maintains its purchase recommendation | MarketScreener HP Inc. : Citigroup se mantiene neutral. | MarketScreener HP Inc. : Citigroup remains neutral. | MarketScreener
Up to now we have interrogated our News API output by converting the JSON objects to Python dictionaries, iterating through them and printing the elements. Sometimes we may wish to view the data in a more tabular format. Below, we will loop through our non-English content stories and create a Pandas dataframe. This will also be useful later when we want to visualize our data.
We'll also pull out some contextual information about each story such as the article's permalink and the stories' sentiment score. AYLIEN's enrichment process predicts the overall sentiment in the body and title of a document as positive, negative and neutral and also outputs a confidence score.
# create dataframe in the format we want
my_columns = ['id', 'title', 'title_eng', 'permalink', 'published_at', 'source', 'body_polarity', 'body_polarity_score']
my_data_frame = []
for story in stories:
# make array of the fields we're interested in
data = [
story['id']
, story['title']
, story['translations']['en']['title']
, story['links']['permalink']
, story['published_at']
, story['source']['domain']
, story['sentiment']['body']['polarity']
, story['sentiment']['body']['score']
]
zipped = zip(my_columns, data)
a_dictionary = dict(zipped)
my_data_frame.append(a_dictionary)
my_data_frame = pd.DataFrame(my_data_frame, columns = my_columns)
my_data_frame.head(5)
id | title | title_eng | permalink | published_at | source | body_polarity | body_polarity_score | |
---|---|---|---|---|---|---|---|---|
0 | 5074728360 | ¿Carlos Slim e Inbursa abandonan la carrera: y... | Are Slim and Inbursa leaving the race: will Ba... | https://www.capitalmexico.com.mx/mundo/carlos-... | 2022-11-24T17:49:13Z | capitalmexico.com.mx | negative | 0.67 |
1 | 5074491026 | Citigroup | Citigroup | https://lado.mx/trending.php?id=5756 | 2022-11-24T14:33:36Z | lado.mx | positive | 0.51 |
2 | 5074310047 | Citigroup | Citigroup | https://www.lado.mx/trending.php?id=5756 | 2022-11-24T12:28:53Z | lado.mx | positive | 0.70 |
3 | 5074108547 | Tesla, Inc. : Citigroup cambia a neutral | Mar... | Tesla, Inc. : Citigroup changes to neutral | M... | https://es.marketscreener.com/cotizacion/accio... | 2022-11-24T09:48:26Z | marketscreener.com | neutral | 0.60 |
4 | 5073894044 | El multimillonario mexicano Carlos Slim descar... | Mexican millionaire Carlos Slim dismisses Bana... | https://palabrasclaras.mx/economia/el-multimil... | 2022-11-24T06:07:00Z | palabrasclaras.mx | negative | 0.58 |
We have seen how we can pull granular stories using the Stories endpoint. However, if we want to investigate volumes of stories over time, we can use the Timeseries endpoint. This endpoint retrieves the stories that meet our criteria and aggregates per minute, hour, day, month, or however we see fit. This can be very usfeul for identifying spikes or dips in news volume relating to a subject of interest. By default, our query below will aggregate the volume of stories per day.
The timeseries endpoint ouputs data in a json format, but out function above will convert this to a pandas dataframe for legibility.
# define the query parameters
params = {
'title': 'Citigroup',
'published_at.start':'NOW-30DAYS',
'published_at.end':'NOW',
}
timeseries_data = get_timeseries(params)
timeseries_data
{'published_at.end': 'NOW', 'published_at.start': 'NOW-30DAYS', 'title': 'Citigroup'} Number of stories returned : 4,765
count | published_at | |
---|---|---|
0 | 114 | 2022-10-29 |
1 | 127 | 2022-10-30 |
2 | 179 | 2022-10-31 |
3 | 185 | 2022-11-01 |
4 | 182 | 2022-11-02 |
5 | 210 | 2022-11-03 |
6 | 216 | 2022-11-04 |
7 | 97 | 2022-11-05 |
8 | 169 | 2022-11-06 |
9 | 152 | 2022-11-07 |
10 | 214 | 2022-11-08 |
11 | 160 | 2022-11-09 |
12 | 145 | 2022-11-10 |
13 | 168 | 2022-11-11 |
14 | 119 | 2022-11-12 |
15 | 138 | 2022-11-13 |
16 | 109 | 2022-11-14 |
17 | 204 | 2022-11-15 |
18 | 213 | 2022-11-16 |
19 | 243 | 2022-11-17 |
20 | 285 | 2022-11-18 |
21 | 121 | 2022-11-19 |
22 | 138 | 2022-11-20 |
23 | 182 | 2022-11-21 |
24 | 166 | 2022-11-22 |
25 | 210 | 2022-11-23 |
26 | 121 | 2022-11-24 |
27 | 86 | 2022-11-25 |
28 | 70 | 2022-11-26 |
29 | 42 | 2022-11-27 |
We can makes sense of timeseries data much quicker if we visualize it. Below, we make use out of Plotly library to visualize the data.
fig = go.Figure( data = go.Scatter(
x = timeseries_data['published_at']
, y=timeseries_data['count']
, line=dict(color='blue')
))
# forrmat the chart
fig.update_layout(
title='Volume of Stories Over Time',
plot_bgcolor='white',
xaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
, yaxis=dict(
gridcolor='rgb(204, 204, 204)',
linecolor='rgb(204, 204, 204)'
)
)
fig.show()