I'm using Tweepy to get tweets and store all the tweets into database. but the problem I'm facing right now is Tweepy also store duplicate tweets into database.
here's the code I use below:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from flask_sqlalchemy import SQLAlchemy
from models import TrainingTweets, db
import mysql.connector
import json
import tweepy
from tweepy.api import API
#consumer key, consumer secret, access token, access secret.
ckey=""
csecret=""
atoken="-"
asecret=""
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
api = tweepy.API(auth)
class listener(StreamListener):
def __init__(self, api=None):
self.api = api or API()
self.n = 0
self.m = 50
def on_data(self, data):
all_data = json.loads(data)
self.n = self.n+1
if self.n <= self.m:
tweet = all_data["text"]
username = all_data["user"]["screen_name"]
label = "1"
ttweets = TrainingTweets(label_id=label, tweet_username=username, tweet=tweet)
db.session.add(ttweets)
db.session.commit()
print((username, tweet))
return True
else:
print("Successfully stored ", self.m, " tweets into database")
return False
def on_error(self, status):
print(status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=["health"], languages=["en"], follow="")
can anyone help me to make Tweepy only store one tweet instead of all duplicate tweets into database?
Aucun commentaire:
Enregistrer un commentaire