In [1]:
#添加必要的库
import pandas as pd
import numpy as np
import requests as r
import os
import tweepy as tp
import json
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

收集

In [2]:
#读取相关文件
dfTwitter_Archive = pd.read_csv('twitter-archive-enhanced.csv')
In [3]:
#从网络上下载文件
file_path = r'https://raw.githubusercontent.com/udacity/new-dand-advanced-china/master/%E6%95%B0%E6%8D%AE%E6%B8%85%E6%B4%97/WeRateDogs%E9%A1%B9%E7%9B%AE/image-predictions.tsv'
response = r.get(file_path)
with open(file_path.split('/')[-1],mode='wb') as file:
    file.write(response.content)
dfImage_Predictions = pd.read_csv('image-predictions.tsv',sep='\t')
In [4]:
#从推特下载其他数据 - 因无法访问推特所以直接下载数据

#consumer_key = 'YOUR CONSUMER KEY'
#consumer_secret = 'YOUR CONSUMER SECRET'
#access_token = 'YOUR ACCESS TOKEN'
#access_secret = 'YOUR ACCESS SECRET'

#auth = tp.OAuthHandler(consumer_key, consumer_secret)
#auth.set_access_token(access_token, access_secret)

#api = tp.API(auth)

#打印其他用户主页上的时间轴里的内容
#public_tweets = api.user_timeline('WeRateDogs')
 
#for tweet in public_tweets:
#    print(tweet.text)

#直接读取json
json_file_path = r'tweet_json.txt'
dfOrigin = pd.read_json(json_file_path,lines=True)
dfJson_Tweeter = dfOrigin[['id','retweet_count','favorite_count']]
#dfOrigin[dfOrigin['id']!=dfOrigin['id_str']].shape[0]
dfJson_Tweeter = dfJson_Tweeter.rename(columns={'id':'tweet_id'})

评估

In [5]:
dfTwitter_Archive
Out[5]:
tweet_id in_reply_to_status_id in_reply_to_user_id timestamp source text retweeted_status_id retweeted_status_user_id retweeted_status_timestamp expanded_urls rating_numerator rating_denominator name doggo floofer pupper puppo
0 892420643555336193 NaN NaN 2017-08-01 16:23:56 +0000 <a href="http://twitter.com/download/iphone" r... This is Phineas. He's a mystical boy. Only eve... NaN NaN NaN https://twitter.com/dog_rates/status/892420643... 13 10 Phineas None None None None
1 892177421306343426 NaN NaN 2017-08-01 00:17:27 +0000 <a href="http://twitter.com/download/iphone" r... This is Tilly. She's just checking pup on you.... NaN NaN NaN https://twitter.com/dog_rates/status/892177421... 13 10 Tilly None None None None
2 891815181378084864 NaN NaN 2017-07-31 00:18:03 +0000 <a href="http://twitter.com/download/iphone" r... This is Archie. He is a rare Norwegian Pouncin... NaN NaN NaN https://twitter.com/dog_rates/status/891815181... 12 10 Archie None None None None
3 891689557279858688 NaN NaN 2017-07-30 15:58:51 +0000 <a href="http://twitter.com/download/iphone" r... This is Darla. She commenced a snooze mid meal... NaN NaN NaN https://twitter.com/dog_rates/status/891689557... 13 10 Darla None None None None
4 891327558926688256 NaN NaN 2017-07-29 16:00:24 +0000 <a href="http://twitter.com/download/iphone" r... This is Franklin. He would like you to stop ca... NaN NaN NaN https://twitter.com/dog_rates/status/891327558... 12 10 Franklin None None None None
5 891087950875897856 NaN NaN 2017-07-29 00:08:17 +0000 <a href="http://twitter.com/download/iphone" r... Here we have a majestic great white breaching ... NaN NaN NaN https://twitter.com/dog_rates/status/891087950... 13 10 None None None None None
6 890971913173991426 NaN NaN 2017-07-28 16:27:12 +0000 <a href="http://twitter.com/download/iphone" r... Meet Jax. He enjoys ice cream so much he gets ... NaN NaN NaN https://gofundme.com/ydvmve-surgery-for-jax,ht... 13 10 Jax None None None None
7 890729181411237888 NaN NaN 2017-07-28 00:22:40 +0000 <a href="http://twitter.com/download/iphone" r... When you watch your owner call another dog a g... NaN NaN NaN https://twitter.com/dog_rates/status/890729181... 13 10 None None None None None
8 890609185150312448 NaN NaN 2017-07-27 16:25:51 +0000 <a href="http://twitter.com/download/iphone" r... This is Zoey. She doesn't want to be one of th... NaN NaN NaN https://twitter.com/dog_rates/status/890609185... 13 10 Zoey None None None None
9 890240255349198849 NaN NaN 2017-07-26 15:59:51 +0000 <a href="http://twitter.com/download/iphone" r... This is Cassie. She is a college pup. Studying... NaN NaN NaN https://twitter.com/dog_rates/status/890240255... 14 10 Cassie doggo None None None
10 890006608113172480 NaN NaN 2017-07-26 00:31:25 +0000 <a href="http://twitter.com/download/iphone" r... This is Koda. He is a South Australian decksha... NaN NaN NaN https://twitter.com/dog_rates/status/890006608... 13 10 Koda None None None None
11 889880896479866881 NaN NaN 2017-07-25 16:11:53 +0000 <a href="http://twitter.com/download/iphone" r... This is Bruno. He is a service shark. Only get... NaN NaN NaN https://twitter.com/dog_rates/status/889880896... 13 10 Bruno None None None None
12 889665388333682689 NaN NaN 2017-07-25 01:55:32 +0000 <a href="http://twitter.com/download/iphone" r... Here's a puppo that seems to be on the fence a... NaN NaN NaN https://twitter.com/dog_rates/status/889665388... 13 10 None None None None puppo
13 889638837579907072 NaN NaN 2017-07-25 00:10:02 +0000 <a href="http://twitter.com/download/iphone" r... This is Ted. He does his best. Sometimes that'... NaN NaN NaN https://twitter.com/dog_rates/status/889638837... 12 10 Ted None None None None
14 889531135344209921 NaN NaN 2017-07-24 17:02:04 +0000 <a href="http://twitter.com/download/iphone" r... This is Stuart. He's sporting his favorite fan... NaN NaN NaN https://twitter.com/dog_rates/status/889531135... 13 10 Stuart None None None puppo
15 889278841981685760 NaN NaN 2017-07-24 00:19:32 +0000 <a href="http://twitter.com/download/iphone" r... This is Oliver. You're witnessing one of his m... NaN NaN NaN https://twitter.com/dog_rates/status/889278841... 13 10 Oliver None None None None
16 888917238123831296 NaN NaN 2017-07-23 00:22:39 +0000 <a href="http://twitter.com/download/iphone" r... This is Jim. He found a fren. Taught him how t... NaN NaN NaN https://twitter.com/dog_rates/status/888917238... 12 10 Jim None None None None
17 888804989199671297 NaN NaN 2017-07-22 16:56:37 +0000 <a href="http://twitter.com/download/iphone" r... This is Zeke. He has a new stick. Very proud o... NaN NaN NaN https://twitter.com/dog_rates/status/888804989... 13 10 Zeke None None None None
18 888554962724278272 NaN NaN 2017-07-22 00:23:06 +0000 <a href="http://twitter.com/download/iphone" r... This is Ralphus. He's powering up. Attempting ... NaN NaN NaN https://twitter.com/dog_rates/status/888554962... 13 10 Ralphus None None None None
19 888202515573088257 NaN NaN 2017-07-21 01:02:36 +0000 <a href="http://twitter.com/download/iphone" r... RT @dog_rates: This is Canela. She attempted s... 8.874740e+17 4.196984e+09 2017-07-19 00:47:34 +0000 https://twitter.com/dog_rates/status/887473957... 13 10 Canela None None None None
20 888078434458587136 NaN NaN 2017-07-20 16:49:33 +0000 <a href="http://twitter.com/download/iphone" r... This is Gerald. He was just told he didn't get... NaN NaN NaN https://twitter.com/dog_rates/status/888078434... 12 10 Gerald None None None None
21 887705289381826560 NaN NaN 2017-07-19 16:06:48 +0000 <a href="http://twitter.com/download/iphone" r... This is Jeffrey. He has a monopoly on the pool... NaN NaN NaN https://twitter.com/dog_rates/status/887705289... 13 10 Jeffrey None None None None
22 887517139158093824 NaN NaN 2017-07-19 03:39:09 +0000 <a href="http://twitter.com/download/iphone" r... I've yet to rate a Venezuelan Hover Wiener. Th... NaN NaN NaN https://twitter.com/dog_rates/status/887517139... 14 10 such None None None None
23 887473957103951883 NaN NaN 2017-07-19 00:47:34 +0000 <a href="http://twitter.com/download/iphone" r... This is Canela. She attempted some fancy porch... NaN NaN NaN https://twitter.com/dog_rates/status/887473957... 13 10 Canela None None None None
24 887343217045368832 NaN NaN 2017-07-18 16:08:03 +0000 <a href="http://twitter.com/download/iphone" r... You may not have known you needed to see this ... NaN NaN NaN https://twitter.com/dog_rates/status/887343217... 13 10 None None None None None
25 887101392804085760 NaN NaN 2017-07-18 00:07:08 +0000 <a href="http://twitter.com/download/iphone" r... This... is a Jubilant Antarctic House Bear. We... NaN NaN NaN https://twitter.com/dog_rates/status/887101392... 12 10 None None None None None
26 886983233522544640 NaN NaN 2017-07-17 16:17:36 +0000 <a href="http://twitter.com/download/iphone" r... This is Maya. She's very shy. Rarely leaves he... NaN NaN NaN https://twitter.com/dog_rates/status/886983233... 13 10 Maya None None None None
27 886736880519319552 NaN NaN 2017-07-16 23:58:41 +0000 <a href="http://twitter.com/download/iphone" r... This is Mingus. He's a wonderful father to his... NaN NaN NaN https://www.gofundme.com/mingusneedsus,https:/... 13 10 Mingus None None None None
28 886680336477933568 NaN NaN 2017-07-16 20:14:00 +0000 <a href="http://twitter.com/download/iphone" r... This is Derek. He's late for a dog meeting. 13... NaN NaN NaN https://twitter.com/dog_rates/status/886680336... 13 10 Derek None None None None
29 886366144734445568 NaN NaN 2017-07-15 23:25:31 +0000 <a href="http://twitter.com/download/iphone" r... This is Roscoe. Another pupper fallen victim t... NaN NaN NaN https://twitter.com/dog_rates/status/886366144... 12 10 Roscoe None None pupper None
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2326 666411507551481857 NaN NaN 2015-11-17 00:24:19 +0000 <a href="http://twitter.com/download/iphone" r... This is quite the dog. Gets really excited whe... NaN NaN NaN https://twitter.com/dog_rates/status/666411507... 2 10 quite None None None None
2327 666407126856765440 NaN NaN 2015-11-17 00:06:54 +0000 <a href="http://twitter.com/download/iphone" r... This is a southern Vesuvius bumblegruff. Can d... NaN NaN NaN https://twitter.com/dog_rates/status/666407126... 7 10 a None None None None
2328 666396247373291520 NaN NaN 2015-11-16 23:23:41 +0000 <a href="http://twitter.com/download/iphone" r... Oh goodness. A super rare northeast Qdoba kang... NaN NaN NaN https://twitter.com/dog_rates/status/666396247... 9 10 None None None None None
2329 666373753744588802 NaN NaN 2015-11-16 21:54:18 +0000 <a href="http://twitter.com/download/iphone" r... Those are sunglasses and a jean jacket. 11/10 ... NaN NaN NaN https://twitter.com/dog_rates/status/666373753... 11 10 None None None None None
2330 666362758909284353 NaN NaN 2015-11-16 21:10:36 +0000 <a href="http://twitter.com/download/iphone" r... Unique dog here. Very small. Lives in containe... NaN NaN NaN https://twitter.com/dog_rates/status/666362758... 6 10 None None None None None
2331 666353288456101888 NaN NaN 2015-11-16 20:32:58 +0000 <a href="http://twitter.com/download/iphone" r... Here we have a mixed Asiago from the Galápagos... NaN NaN NaN https://twitter.com/dog_rates/status/666353288... 8 10 None None None None None
2332 666345417576210432 NaN NaN 2015-11-16 20:01:42 +0000 <a href="http://twitter.com/download/iphone" r... Look at this jokester thinking seat belt laws ... NaN NaN NaN https://twitter.com/dog_rates/status/666345417... 10 10 None None None None None
2333 666337882303524864 NaN NaN 2015-11-16 19:31:45 +0000 <a href="http://twitter.com/download/iphone" r... This is an extremely rare horned Parthenon. No... NaN NaN NaN https://twitter.com/dog_rates/status/666337882... 9 10 an None None None None
2334 666293911632134144 NaN NaN 2015-11-16 16:37:02 +0000 <a href="http://twitter.com/download/iphone" r... This is a funny dog. Weird toes. Won't come do... NaN NaN NaN https://twitter.com/dog_rates/status/666293911... 3 10 a None None None None
2335 666287406224695296 NaN NaN 2015-11-16 16:11:11 +0000 <a href="http://twitter.com/download/iphone" r... This is an Albanian 3 1/2 legged Episcopalian... NaN NaN NaN https://twitter.com/dog_rates/status/666287406... 1 2 an None None None None
2336 666273097616637952 NaN NaN 2015-11-16 15:14:19 +0000 <a href="http://twitter.com/download/iphone" r... Can take selfies 11/10 https://t.co/ws2AMaNwPW NaN NaN NaN https://twitter.com/dog_rates/status/666273097... 11 10 None None None None None
2337 666268910803644416 NaN NaN 2015-11-16 14:57:41 +0000 <a href="http://twitter.com/download/iphone" r... Very concerned about fellow dog trapped in com... NaN NaN NaN https://twitter.com/dog_rates/status/666268910... 10 10 None None None None None
2338 666104133288665088 NaN NaN 2015-11-16 04:02:55 +0000 <a href="http://twitter.com/download/iphone" r... Not familiar with this breed. No tail (weird).... NaN NaN NaN https://twitter.com/dog_rates/status/666104133... 1 10 None None None None None
2339 666102155909144576 NaN NaN 2015-11-16 03:55:04 +0000 <a href="http://twitter.com/download/iphone" r... Oh my. Here you are seeing an Adobe Setter giv... NaN NaN NaN https://twitter.com/dog_rates/status/666102155... 11 10 None None None None None
2340 666099513787052032 NaN NaN 2015-11-16 03:44:34 +0000 <a href="http://twitter.com/download/iphone" r... Can stand on stump for what seems like a while... NaN NaN NaN https://twitter.com/dog_rates/status/666099513... 8 10 None None None None None
2341 666094000022159362 NaN NaN 2015-11-16 03:22:39 +0000 <a href="http://twitter.com/download/iphone" r... This appears to be a Mongolian Presbyterian mi... NaN NaN NaN https://twitter.com/dog_rates/status/666094000... 9 10 None None None None None
2342 666082916733198337 NaN NaN 2015-11-16 02:38:37 +0000 <a href="http://twitter.com/download/iphone" r... Here we have a well-established sunblockerspan... NaN NaN NaN https://twitter.com/dog_rates/status/666082916... 6 10 None None None None None
2343 666073100786774016 NaN NaN 2015-11-16 01:59:36 +0000 <a href="http://twitter.com/download/iphone" r... Let's hope this flight isn't Malaysian (lol). ... NaN NaN NaN https://twitter.com/dog_rates/status/666073100... 10 10 None None None None None
2344 666071193221509120 NaN NaN 2015-11-16 01:52:02 +0000 <a href="http://twitter.com/download/iphone" r... Here we have a northern speckled Rhododendron.... NaN NaN NaN https://twitter.com/dog_rates/status/666071193... 9 10 None None None None None
2345 666063827256086533 NaN NaN 2015-11-16 01:22:45 +0000 <a href="http://twitter.com/download/iphone" r... This is the happiest dog you will ever see. Ve... NaN NaN NaN https://twitter.com/dog_rates/status/666063827... 10 10 the None None None None
2346 666058600524156928 NaN NaN 2015-11-16 01:01:59 +0000 <a href="http://twitter.com/download/iphone" r... Here is the Rand Paul of retrievers folks! He'... NaN NaN NaN https://twitter.com/dog_rates/status/666058600... 8 10 the None None None None
2347 666057090499244032 NaN NaN 2015-11-16 00:55:59 +0000 <a href="http://twitter.com/download/iphone" r... My oh my. This is a rare blond Canadian terrie... NaN NaN NaN https://twitter.com/dog_rates/status/666057090... 9 10 a None None None None
2348 666055525042405380 NaN NaN 2015-11-16 00:49:46 +0000 <a href="http://twitter.com/download/iphone" r... Here is a Siberian heavily armored polar bear ... NaN NaN NaN https://twitter.com/dog_rates/status/666055525... 10 10 a None None None None
2349 666051853826850816 NaN NaN 2015-11-16 00:35:11 +0000 <a href="http://twitter.com/download/iphone" r... This is an odd dog. Hard on the outside but lo... NaN NaN NaN https://twitter.com/dog_rates/status/666051853... 2 10 an None None None None
2350 666050758794694657 NaN NaN 2015-11-16 00:30:50 +0000 <a href="http://twitter.com/download/iphone" r... This is a truly beautiful English Wilson Staff... NaN NaN NaN https://twitter.com/dog_rates/status/666050758... 10 10 a None None None None
2351 666049248165822465 NaN NaN 2015-11-16 00:24:50 +0000 <a href="http://twitter.com/download/iphone" r... Here we have a 1949 1st generation vulpix. Enj... NaN NaN NaN https://twitter.com/dog_rates/status/666049248... 5 10 None None None None None
2352 666044226329800704 NaN NaN 2015-11-16 00:04:52 +0000 <a href="http://twitter.com/download/iphone" r... This is a purebred Piers Morgan. Loves to Netf... NaN NaN NaN https://twitter.com/dog_rates/status/666044226... 6 10 a None None None None
2353 666033412701032449 NaN NaN 2015-11-15 23:21:54 +0000 <a href="http://twitter.com/download/iphone" r... Here is a very happy pup. Big fan of well-main... NaN NaN NaN https://twitter.com/dog_rates/status/666033412... 9 10 a None None None None
2354 666029285002620928 NaN NaN 2015-11-15 23:05:30 +0000 <a href="http://twitter.com/download/iphone" r... This is a western brown Mitsubishi terrier. Up... NaN NaN NaN https://twitter.com/dog_rates/status/666029285... 7 10 a None None None None
2355 666020888022790149 NaN NaN 2015-11-15 22:32:08 +0000 <a href="http://twitter.com/download/iphone" r... Here we have a Japanese Irish Setter. Lost eye... NaN NaN NaN https://twitter.com/dog_rates/status/666020888... 8 10 None None None None None

2356 rows × 17 columns

In [6]:
dfTwitter_Archive.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2356 entries, 0 to 2355
Data columns (total 17 columns):
tweet_id                      2356 non-null int64
in_reply_to_status_id         78 non-null float64
in_reply_to_user_id           78 non-null float64
timestamp                     2356 non-null object
source                        2356 non-null object
text                          2356 non-null object
retweeted_status_id           181 non-null float64
retweeted_status_user_id      181 non-null float64
retweeted_status_timestamp    181 non-null object
expanded_urls                 2297 non-null object
rating_numerator              2356 non-null int64
rating_denominator            2356 non-null int64
name                          2356 non-null object
doggo                         2356 non-null object
floofer                       2356 non-null object
pupper                        2356 non-null object
puppo                         2356 non-null object
dtypes: float64(4), int64(3), object(10)
memory usage: 313.0+ KB
In [7]:
#通过观察发现了rating_denominator的值并不全部为10,还有其他数字
dfTwitter_Archive['rating_denominator'].value_counts()
Out[7]:
10     2333
11        3
50        3
80        2
20        2
2         1
16        1
40        1
70        1
15        1
90        1
110       1
120       1
130       1
150       1
170       1
7         1
0         1
Name: rating_denominator, dtype: int64
In [8]:
#通过观察发现了名字name列出现了诸如a,an,the的单词,这并不是正常的狗名字
dfTwitter_Archive['name'].value_counts().head(20)
Out[8]:
None       745
a           55
Charlie     12
Oliver      11
Lucy        11
Cooper      11
Lola        10
Penny       10
Tucker      10
Winston      9
Bo           9
Sadie        8
the          8
an           7
Toby         7
Bailey       7
Daisy        7
Buddy        7
Dave         6
Scout        6
Name: name, dtype: int64
In [9]:
#狗的地位有缺失,有些甚至还有两种地位
(dfTwitter_Archive.iloc[:,-4:]=='None').astype(int).sum(axis=1).value_counts()
Out[9]:
4    1976
3     366
2      14
dtype: int64
In [10]:
#狗的名字有大量缺失
(dfTwitter_Archive.loc[:,'name']=='None').astype(int).sum()
Out[10]:
745
In [11]:
dfTwitter_Archive['tweet_id'].duplicated().sum()
Out[11]:
0
In [12]:
dfImage_Predictions
Out[12]:
tweet_id jpg_url img_num p1 p1_conf p1_dog p2 p2_conf p2_dog p3 p3_conf p3_dog
0 666020888022790149 https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg 1 Welsh_springer_spaniel 0.465074 True collie 0.156665 True Shetland_sheepdog 0.061428 True
1 666029285002620928 https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg 1 redbone 0.506826 True miniature_pinscher 0.074192 True Rhodesian_ridgeback 0.072010 True
2 666033412701032449 https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg 1 German_shepherd 0.596461 True malinois 0.138584 True bloodhound 0.116197 True
3 666044226329800704 https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg 1 Rhodesian_ridgeback 0.408143 True redbone 0.360687 True miniature_pinscher 0.222752 True
4 666049248165822465 https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg 1 miniature_pinscher 0.560311 True Rottweiler 0.243682 True Doberman 0.154629 True
5 666050758794694657 https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg 1 Bernese_mountain_dog 0.651137 True English_springer 0.263788 True Greater_Swiss_Mountain_dog 0.016199 True
6 666051853826850816 https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg 1 box_turtle 0.933012 False mud_turtle 0.045885 False terrapin 0.017885 False
7 666055525042405380 https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg 1 chow 0.692517 True Tibetan_mastiff 0.058279 True fur_coat 0.054449 False
8 666057090499244032 https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg 1 shopping_cart 0.962465 False shopping_basket 0.014594 False golden_retriever 0.007959 True
9 666058600524156928 https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg 1 miniature_poodle 0.201493 True komondor 0.192305 True soft-coated_wheaten_terrier 0.082086 True
10 666063827256086533 https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg 1 golden_retriever 0.775930 True Tibetan_mastiff 0.093718 True Labrador_retriever 0.072427 True
11 666071193221509120 https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg 1 Gordon_setter 0.503672 True Yorkshire_terrier 0.174201 True Pekinese 0.109454 True
12 666073100786774016 https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg 1 Walker_hound 0.260857 True English_foxhound 0.175382 True Ibizan_hound 0.097471 True
13 666082916733198337 https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg 1 pug 0.489814 True bull_mastiff 0.404722 True French_bulldog 0.048960 True
14 666094000022159362 https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg 1 bloodhound 0.195217 True German_shepherd 0.078260 True malinois 0.075628 True
15 666099513787052032 https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg 1 Lhasa 0.582330 True Shih-Tzu 0.166192 True Dandie_Dinmont 0.089688 True
16 666102155909144576 https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg 1 English_setter 0.298617 True Newfoundland 0.149842 True borzoi 0.133649 True
17 666104133288665088 https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg 1 hen 0.965932 False cock 0.033919 False partridge 0.000052 False
18 666268910803644416 https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg 1 desktop_computer 0.086502 False desk 0.085547 False bookcase 0.079480 False
19 666273097616637952 https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg 1 Italian_greyhound 0.176053 True toy_terrier 0.111884 True basenji 0.111152 True
20 666287406224695296 https://pbs.twimg.com/media/CT8g3BpUEAAuFjg.jpg 1 Maltese_dog 0.857531 True toy_poodle 0.063064 True miniature_poodle 0.025581 True
21 666293911632134144 https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg 1 three-toed_sloth 0.914671 False otter 0.015250 False great_grey_owl 0.013207 False
22 666337882303524864 https://pbs.twimg.com/media/CT9OwFIWEAMuRje.jpg 1 ox 0.416669 False Newfoundland 0.278407 True groenendael 0.102643 True
23 666345417576210432 https://pbs.twimg.com/media/CT9Vn7PWoAA_ZCM.jpg 1 golden_retriever 0.858744 True Chesapeake_Bay_retriever 0.054787 True Labrador_retriever 0.014241 True
24 666353288456101888 https://pbs.twimg.com/media/CT9cx0tUEAAhNN_.jpg 1 malamute 0.336874 True Siberian_husky 0.147655 True Eskimo_dog 0.093412 True
25 666362758909284353 https://pbs.twimg.com/media/CT9lXGsUcAAyUFt.jpg 1 guinea_pig 0.996496 False skunk 0.002402 False hamster 0.000461 False
26 666373753744588802 https://pbs.twimg.com/media/CT9vZEYWUAAlZ05.jpg 1 soft-coated_wheaten_terrier 0.326467 True Afghan_hound 0.259551 True briard 0.206803 True
27 666396247373291520 https://pbs.twimg.com/media/CT-D2ZHWIAA3gK1.jpg 1 Chihuahua 0.978108 True toy_terrier 0.009397 True papillon 0.004577 True
28 666407126856765440 https://pbs.twimg.com/media/CT-NvwmW4AAugGZ.jpg 1 black-and-tan_coonhound 0.529139 True bloodhound 0.244220 True flat-coated_retriever 0.173810 True
29 666411507551481857 https://pbs.twimg.com/media/CT-RugiWIAELEaq.jpg 1 coho 0.404640 False barracouta 0.271485 False gar 0.189945 False
... ... ... ... ... ... ... ... ... ... ... ... ...
2045 886366144734445568 https://pbs.twimg.com/media/DE0BTnQUwAApKEH.jpg 1 French_bulldog 0.999201 True Chihuahua 0.000361 True Boston_bull 0.000076 True
2046 886680336477933568 https://pbs.twimg.com/media/DE4fEDzWAAAyHMM.jpg 1 convertible 0.738995 False sports_car 0.139952 False car_wheel 0.044173 False
2047 886736880519319552 https://pbs.twimg.com/media/DE5Se8FXcAAJFx4.jpg 1 kuvasz 0.309706 True Great_Pyrenees 0.186136 True Dandie_Dinmont 0.086346 True
2048 886983233522544640 https://pbs.twimg.com/media/DE8yicJW0AAAvBJ.jpg 2 Chihuahua 0.793469 True toy_terrier 0.143528 True can_opener 0.032253 False
2049 887101392804085760 https://pbs.twimg.com/media/DE-eAq6UwAA-jaE.jpg 1 Samoyed 0.733942 True Eskimo_dog 0.035029 True Staffordshire_bullterrier 0.029705 True
2050 887343217045368832 https://pbs.twimg.com/ext_tw_video_thumb/88734... 1 Mexican_hairless 0.330741 True sea_lion 0.275645 False Weimaraner 0.134203 True
2051 887473957103951883 https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg 2 Pembroke 0.809197 True Rhodesian_ridgeback 0.054950 True beagle 0.038915 True
2052 887517139158093824 https://pbs.twimg.com/ext_tw_video_thumb/88751... 1 limousine 0.130432 False tow_truck 0.029175 False shopping_cart 0.026321 False
2053 887705289381826560 https://pbs.twimg.com/media/DFHDQBbXgAEqY7t.jpg 1 basset 0.821664 True redbone 0.087582 True Weimaraner 0.026236 True
2054 888078434458587136 https://pbs.twimg.com/media/DFMWn56WsAAkA7B.jpg 1 French_bulldog 0.995026 True pug 0.000932 True bull_mastiff 0.000903 True
2055 888202515573088257 https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg 2 Pembroke 0.809197 True Rhodesian_ridgeback 0.054950 True beagle 0.038915 True
2056 888554962724278272 https://pbs.twimg.com/media/DFTH_O-UQAACu20.jpg 3 Siberian_husky 0.700377 True Eskimo_dog 0.166511 True malamute 0.111411 True
2057 888804989199671297 https://pbs.twimg.com/media/DFWra-3VYAA2piG.jpg 1 golden_retriever 0.469760 True Labrador_retriever 0.184172 True English_setter 0.073482 True
2058 888917238123831296 https://pbs.twimg.com/media/DFYRgsOUQAARGhO.jpg 1 golden_retriever 0.714719 True Tibetan_mastiff 0.120184 True Labrador_retriever 0.105506 True
2059 889278841981685760 https://pbs.twimg.com/ext_tw_video_thumb/88927... 1 whippet 0.626152 True borzoi 0.194742 True Saluki 0.027351 True
2060 889531135344209921 https://pbs.twimg.com/media/DFg_2PVW0AEHN3p.jpg 1 golden_retriever 0.953442 True Labrador_retriever 0.013834 True redbone 0.007958 True
2061 889638837579907072 https://pbs.twimg.com/media/DFihzFfXsAYGDPR.jpg 1 French_bulldog 0.991650 True boxer 0.002129 True Staffordshire_bullterrier 0.001498 True
2062 889665388333682689 https://pbs.twimg.com/media/DFi579UWsAAatzw.jpg 1 Pembroke 0.966327 True Cardigan 0.027356 True basenji 0.004633 True
2063 889880896479866881 https://pbs.twimg.com/media/DFl99B1WsAITKsg.jpg 1 French_bulldog 0.377417 True Labrador_retriever 0.151317 True muzzle 0.082981 False
2064 890006608113172480 https://pbs.twimg.com/media/DFnwSY4WAAAMliS.jpg 1 Samoyed 0.957979 True Pomeranian 0.013884 True chow 0.008167 True
2065 890240255349198849 https://pbs.twimg.com/media/DFrEyVuW0AAO3t9.jpg 1 Pembroke 0.511319 True Cardigan 0.451038 True Chihuahua 0.029248 True
2066 890609185150312448 https://pbs.twimg.com/media/DFwUU__XcAEpyXI.jpg 1 Irish_terrier 0.487574 True Irish_setter 0.193054 True Chesapeake_Bay_retriever 0.118184 True
2067 890729181411237888 https://pbs.twimg.com/media/DFyBahAVwAAhUTd.jpg 2 Pomeranian 0.566142 True Eskimo_dog 0.178406 True Pembroke 0.076507 True
2068 890971913173991426 https://pbs.twimg.com/media/DF1eOmZXUAALUcq.jpg 1 Appenzeller 0.341703 True Border_collie 0.199287 True ice_lolly 0.193548 False
2069 891087950875897856 https://pbs.twimg.com/media/DF3HwyEWsAABqE6.jpg 1 Chesapeake_Bay_retriever 0.425595 True Irish_terrier 0.116317 True Indian_elephant 0.076902 False
2070 891327558926688256 https://pbs.twimg.com/media/DF6hr6BUMAAzZgT.jpg 2 basset 0.555712 True English_springer 0.225770 True German_short-haired_pointer 0.175219 True
2071 891689557279858688 https://pbs.twimg.com/media/DF_q7IAWsAEuuN8.jpg 1 paper_towel 0.170278 False Labrador_retriever 0.168086 True spatula 0.040836 False
2072 891815181378084864 https://pbs.twimg.com/media/DGBdLU1WsAANxJ9.jpg 1 Chihuahua 0.716012 True malamute 0.078253 True kelpie 0.031379 True
2073 892177421306343426 https://pbs.twimg.com/media/DGGmoV4XsAAUL6n.jpg 1 Chihuahua 0.323581 True Pekinese 0.090647 True papillon 0.068957 True
2074 892420643555336193 https://pbs.twimg.com/media/DGKD1-bXoAAIAUK.jpg 1 orange 0.097049 False bagel 0.085851 False banana 0.076110 False

2075 rows × 12 columns

In [13]:
dfImage_Predictions.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2075 entries, 0 to 2074
Data columns (total 12 columns):
tweet_id    2075 non-null int64
jpg_url     2075 non-null object
img_num     2075 non-null int64
p1          2075 non-null object
p1_conf     2075 non-null float64
p1_dog      2075 non-null bool
p2          2075 non-null object
p2_conf     2075 non-null float64
p2_dog      2075 non-null bool
p3          2075 non-null object
p3_conf     2075 non-null float64
p3_dog      2075 non-null bool
dtypes: bool(3), float64(3), int64(2), object(4)
memory usage: 152.1+ KB
In [14]:
dfImage_Predictions['jpg_url'].duplicated().sum()
Out[14]:
66
In [15]:
dfJson_Tweeter
Out[15]:
tweet_id retweet_count favorite_count
0 892420643555336193 8842 39492
1 892177421306343426 6480 33786
2 891815181378084864 4301 25445
3 891689557279858688 8925 42863
4 891327558926688256 9721 41016
5 891087950875897856 3240 20548
6 890971913173991426 2142 12053
7 890729181411237888 19548 66596
8 890609185150312448 4403 28187
9 890240255349198849 7684 32467
10 890006608113172480 7584 31127
11 889880896479866881 5116 28208
12 889665388333682689 8502 38745
13 889638837579907072 4705 27633
14 889531135344209921 2309 15329
15 889278841981685760 5635 25712
16 888917238123831296 4681 29555
17 888804989199671297 4535 26021
18 888554962724278272 3722 20267
19 888078434458587136 3637 22144
20 887705289381826560 5584 30690
21 887517139158093824 12053 46940
22 887473957103951883 18813 70007
23 887343217045368832 10713 34223
24 887101392804085760 6147 31045
25 886983233522544640 8045 35786
26 886736880519319552 3420 12286
27 886680336477933568 4597 22802
28 886366144734445568 3297 21488
29 886267009285017600 4 117
... ... ... ...
2322 666411507551481857 337 457
2323 666407126856765440 43 113
2324 666396247373291520 91 171
2325 666373753744588802 99 194
2326 666362758909284353 590 801
2327 666353288456101888 76 228
2328 666345417576210432 146 308
2329 666337882303524864 96 203
2330 666293911632134144 365 519
2331 666287406224695296 71 152
2332 666273097616637952 81 183
2333 666268910803644416 37 108
2334 666104133288665088 6835 14703
2335 666102155909144576 15 81
2336 666099513787052032 73 160
2337 666094000022159362 78 168
2338 666082916733198337 47 121
2339 666073100786774016 173 334
2340 666071193221509120 67 154
2341 666063827256086533 230 494
2342 666058600524156928 61 117
2343 666057090499244032 146 304
2344 666055525042405380 261 449
2345 666051853826850816 877 1250
2346 666050758794694657 60 136
2347 666049248165822465 41 111
2348 666044226329800704 147 309
2349 666033412701032449 47 128
2350 666029285002620928 48 132
2351 666020888022790149 530 2528

2352 rows × 3 columns

In [16]:
dfJson_Tweeter.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2352 entries, 0 to 2351
Data columns (total 3 columns):
tweet_id          2352 non-null int64
retweet_count     2352 non-null int64
favorite_count    2352 non-null int64
dtypes: int64(3)
memory usage: 73.5 KB
In [17]:
dfJson_Tweeter['tweet_id'].duplicated().sum()
Out[17]:
0

问题记录

质量问题

  • dfTwitter_Archive中包含了转发、回复的数据
  • dfTwitter_Archive的expanded_urls列有缺失
  • dfTwitter_Archive中,狗的地位有缺失,有些甚至还有两种地位
  • dfTwitter_Archive的name列含有a,an,the等异常名字
  • dfTwitter_Archive中,基准分rating_denominator列不全部为10
  • dfTwitter_Archive的source列包含了HTML格式的代码
  • dfImage_Predictions中图片链接jpg_url存在重复值
  • dfImage_Predictions中对于品种的写法不规范,存在大小写混搭,分隔有用空格、减号、下划线

整洁度问题

  • dfTwitter_Archive中,狗的“地位”使用了doggo,floofer,pupper,puppo四列进行统计
  • 三个Dataframe的观察对象相同,应放在同一Dataframe内

清理

In [18]:
#准备工作
dfTwitter_Archive_Clean = dfTwitter_Archive.copy()
dfImage_Predictions_Clean = dfImage_Predictions.copy()
dfJson_Tweeter_Clean = dfJson_Tweeter.copy()

dfTwitter_Archive中包含了转发、回复的数据

对转发、回复的数据执行删除操作

In [19]:
dfTwitter_Archive_Clean = dfTwitter_Archive_Clean[dfTwitter_Archive_Clean['retweeted_status_id'].isnull()]
dfTwitter_Archive_Clean = dfTwitter_Archive_Clean[dfTwitter_Archive_Clean['in_reply_to_user_id'].isnull()]
In [20]:
#测试
dfTwitter_Archive_Clean.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2097 entries, 0 to 2355
Data columns (total 17 columns):
tweet_id                      2097 non-null int64
in_reply_to_status_id         0 non-null float64
in_reply_to_user_id           0 non-null float64
timestamp                     2097 non-null object
source                        2097 non-null object
text                          2097 non-null object
retweeted_status_id           0 non-null float64
retweeted_status_user_id      0 non-null float64
retweeted_status_timestamp    0 non-null object
expanded_urls                 2094 non-null object
rating_numerator              2097 non-null int64
rating_denominator            2097 non-null int64
name                          2097 non-null object
doggo                         2097 non-null object
floofer                       2097 non-null object
pupper                        2097 non-null object
puppo                         2097 non-null object
dtypes: float64(4), int64(3), object(10)
memory usage: 294.9+ KB
In [21]:
#将多余列删除
dfTwitter_Archive_Clean.drop(['in_reply_to_status_id','in_reply_to_user_id','retweeted_status_id','retweeted_status_user_id','retweeted_status_timestamp'],axis=1,inplace=True)
dfTwitter_Archive_Clean.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2097 entries, 0 to 2355
Data columns (total 12 columns):
tweet_id              2097 non-null int64
timestamp             2097 non-null object
source                2097 non-null object
text                  2097 non-null object
expanded_urls         2094 non-null object
rating_numerator      2097 non-null int64
rating_denominator    2097 non-null int64
name                  2097 non-null object
doggo                 2097 non-null object
floofer               2097 non-null object
pupper                2097 non-null object
puppo                 2097 non-null object
dtypes: int64(3), object(9)
memory usage: 213.0+ KB

dfTwitter_Archive的expanded_urls列有缺失

将expanded_urls列缺失的数据删除

In [22]:
dfTwitter_Archive_Clean = dfTwitter_Archive_Clean[dfTwitter_Archive_Clean['expanded_urls'].notnull()]
In [23]:
#测试
dfTwitter_Archive_Clean.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2094 entries, 0 to 2355
Data columns (total 12 columns):
tweet_id              2094 non-null int64
timestamp             2094 non-null object
source                2094 non-null object
text                  2094 non-null object
expanded_urls         2094 non-null object
rating_numerator      2094 non-null int64
rating_denominator    2094 non-null int64
name                  2094 non-null object
doggo                 2094 non-null object
floofer               2094 non-null object
pupper                2094 non-null object
puppo                 2094 non-null object
dtypes: int64(3), object(9)
memory usage: 212.7+ KB

dfTwitter_Archive中,狗的地位有缺失,有些甚至还有两种地位

dfTwitter_Archive中,狗的“地位”使用了doggo,floofer,pupper,puppo四列进行统计

从原始数据中,发现很多推文并未明确给出狗的地位,因此缺失的数据暂时没有办法填充

将doggo,floofer,pupper,puppo这4列合并,统计在新列stage,删除这4列

In [24]:
#查看原始推文
dfTwitter_Archive[(dfTwitter_Archive.iloc[:,-4:]=='None').astype(int).sum(axis=1)==4].sample(1).text
Out[24]:
1649    Meet Sebastian. He's a womanizer. Romantic af....
Name: text, dtype: object
In [25]:
#合并原四列
dfTwitter_Archive_Clean['stage'] = dfTwitter_Archive_Clean['doggo']+dfTwitter_Archive_Clean['floofer']+dfTwitter_Archive_Clean['pupper']+dfTwitter_Archive_Clean['puppo']
dfTwitter_Archive_Clean['stage'] = dfTwitter_Archive_Clean['stage'].str.replace('None','')
dfTwitter_Archive_Clean = dfTwitter_Archive_Clean.replace(({'stage':{'':np.nan}}))

#删除四列
dfTwitter_Archive_Clean.drop(['doggo','floofer','pupper','puppo'],axis=1,inplace=True)
dfTwitter_Archive_Clean[dfTwitter_Archive_Clean['stage'].notnull()]
Out[25]:
tweet_id timestamp source text expanded_urls rating_numerator rating_denominator name stage
9 890240255349198849 2017-07-26 15:59:51 +0000 <a href="http://twitter.com/download/iphone" r... This is Cassie. She is a college pup. Studying... https://twitter.com/dog_rates/status/890240255... 14 10 Cassie doggo
12 889665388333682689 2017-07-25 01:55:32 +0000 <a href="http://twitter.com/download/iphone" r... Here's a puppo that seems to be on the fence a... https://twitter.com/dog_rates/status/889665388... 13 10 None puppo
14 889531135344209921 2017-07-24 17:02:04 +0000 <a href="http://twitter.com/download/iphone" r... This is Stuart. He's sporting his favorite fan... https://twitter.com/dog_rates/status/889531135... 13 10 Stuart puppo
29 886366144734445568 2017-07-15 23:25:31 +0000 <a href="http://twitter.com/download/iphone" r... This is Roscoe. Another pupper fallen victim t... https://twitter.com/dog_rates/status/886366144... 12 10 Roscoe pupper
43 884162670584377345 2017-07-09 21:29:42 +0000 <a href="http://twitter.com/download/iphone" r... Meet Yogi. He doesn't have any important dog m... https://twitter.com/dog_rates/status/884162670... 12 10 Yogi doggo
46 883360690899218434 2017-07-07 16:22:55 +0000 <a href="http://twitter.com/download/iphone" r... Meet Grizzwald. He may be the floofiest floofe... https://twitter.com/dog_rates/status/883360690... 13 10 Grizzwald floofer
49 882762694511734784 2017-07-06 00:46:41 +0000 <a href="http://twitter.com/download/iphone" r... This is Gus. He's quite the cheeky pupper. Alr... https://twitter.com/dog_rates/status/882762694... 12 10 Gus pupper
56 881536004380872706 2017-07-02 15:32:16 +0000 <a href="http://twitter.com/download/iphone" r... Here is a pupper approaching maximum borkdrive... https://twitter.com/dog_rates/status/881536004... 14 10 a pupper
71 878776093423087618 2017-06-25 00:45:22 +0000 <a href="http://twitter.com/download/iphone" r... This is Snoopy. He's a proud #PrideMonthPuppo.... https://twitter.com/dog_rates/status/878776093... 13 10 Snoopy puppo
82 876838120628539392 2017-06-19 16:24:33 +0000 <a href="http://twitter.com/download/iphone" r... This is Ginger. She's having a ruff Monday. To... https://twitter.com/dog_rates/status/876838120... 12 10 Ginger pupper
92 874296783580663808 2017-06-12 16:06:11 +0000 <a href="http://twitter.com/download/iphone" r... This is Jed. He may be the fanciest pupper in ... https://twitter.com/dog_rates/status/874296783... 13 10 Jed pupper
94 874012996292530176 2017-06-11 21:18:31 +0000 <a href="http://twitter.com/download/iphone" r... This is Sebastian. He can't see all the colors... https://twitter.com/dog_rates/status/874012996... 13 10 Sebastian puppo
98 873213775632977920 2017-06-09 16:22:42 +0000 <a href="http://twitter.com/download/iphone" r... This is Sierra. She's one precious pupper. Abs... https://www.gofundme.com/help-my-baby-sierra-g... 12 10 Sierra pupper
99 872967104147763200 2017-06-09 00:02:31 +0000 <a href="http://twitter.com/download/iphone" r... Here's a very large dog. He has a date later. ... https://twitter.com/dog_rates/status/872967104... 12 10 None doggo
107 871762521631449091 2017-06-05 16:15:56 +0000 <a href="http://twitter.com/download/iphone" r... This is Rover. As part of pupper protocol he h... https://twitter.com/dog_rates/status/871762521... 12 10 Rover pupper
108 871515927908634625 2017-06-04 23:56:03 +0000 <a href="http://twitter.com/download/iphone" r... This is Napolean. He's a Raggedy East Nicaragu... https://twitter.com/dog_rates/status/871515927... 12 10 Napolean doggo
110 871102520638267392 2017-06-03 20:33:19 +0000 <a href="http://twitter.com/download/iphone" r... Never doubt a doggo 14/10 https://t.co/AbBLh2FZCH https://twitter.com/animalcog/status/871075758... 14 10 None doggo
121 869596645499047938 2017-05-30 16:49:31 +0000 <a href="http://twitter.com/download/iphone" r... This is Scout. He just graduated. Officially a... https://twitter.com/dog_rates/status/869596645... 12 10 Scout doggo
129 867421006826221569 2017-05-24 16:44:18 +0000 <a href="http://twitter.com/download/iphone" r... This is Shikha. She just watched you drop a sk... https://twitter.com/dog_rates/status/867421006... 12 10 Shikha puppo
135 866450705531457537 2017-05-22 00:28:40 +0000 <a href="http://twitter.com/download/iphone" r... This is Jamesy. He gives a kiss to every other... https://twitter.com/dog_rates/status/866450705... 13 10 Jamesy pupper
168 859607811541651456 2017-05-03 03:17:27 +0000 <a href="http://twitter.com/download/iphone" r... Sorry for the lack of posts today. I came home... https://twitter.com/dog_rates/status/859607811... 13 10 None puppo
172 858843525470990336 2017-05-01 00:40:27 +0000 <a href="http://twitter.com/download/iphone" r... I have stumbled puppon a doggo painting party.... https://twitter.com/dog_rates/status/858843525... 13 10 None doggo
191 855851453814013952 2017-04-22 18:31:02 +0000 <a href="http://twitter.com/download/iphone" r... Here's a puppo participating in the #ScienceMa... https://twitter.com/dog_rates/status/855851453... 13 10 None doggopuppo
199 854120357044912130 2017-04-17 23:52:16 +0000 <a href="http://twitter.com/download/iphone" r... Sometimes you guys remind me just how impactfu... https://twitter.com/dog_rates/status/854120357... 14 10 None pupper
200 854010172552949760 2017-04-17 16:34:26 +0000 <a href="http://twitter.com/download/iphone" r... At first I thought this was a shy doggo, but i... https://twitter.com/dog_rates/status/854010172... 11 10 None doggofloofer
220 850019790995546112 2017-04-06 16:18:05 +0000 <a href="http://twitter.com/download/iphone" r... Say hello to Boomer. He's a sandy pupper. Havi... https://twitter.com/dog_rates/status/850019790... 12 10 Boomer pupper
240 846514051647705089 2017-03-28 00:07:32 +0000 <a href="http://twitter.com/download/iphone" r... This is Barney. He's an elder doggo. Hitches a... https://twitter.com/dog_rates/status/846514051... 13 10 Barney doggo
248 845397057150107648 2017-03-24 22:08:59 +0000 <a href="http://twitter.com/download/iphone" r... Say hello to Mimosa. She's an emotional suppor... https://www.gofundme.com/help-save-a-pup,https... 13 10 Mimosa doggo
249 845306882940190720 2017-03-24 16:10:40 +0000 <a href="http://twitter.com/download/iphone" r... This is Pickles. She's a silly pupper. Thinks ... https://twitter.com/dog_rates/status/845306882... 12 10 Pickles pupper
293 837820167694528512 2017-03-04 00:21:08 +0000 <a href="http://twitter.com/download/iphone" r... Here's a pupper before and after being asked "... https://twitter.com/dog_rates/status/837820167... 12 10 None pupper
... ... ... ... ... ... ... ... ... ...
1875 675113801096802304 2015-12-11 00:44:07 +0000 <a href="http://twitter.com/download/iphone" r... Meet Zuzu. He just graduated college. Astute p... https://twitter.com/dog_rates/status/675113801... 10 10 Zuzu pupper
1880 675006312288268288 2015-12-10 17:37:00 +0000 <a href="http://twitter.com/download/iphone" r... Say hello to Mollie. This pic was taken after ... https://twitter.com/dog_rates/status/675006312... 10 10 Mollie pupper
1889 674774481756377088 2015-12-10 02:15:47 +0000 <a href="http://twitter.com/download/iphone" r... This is Superpup. His head isn't proportional ... https://twitter.com/dog_rates/status/674774481... 11 10 Superpup pupper
1897 674737130913071104 2015-12-09 23:47:22 +0000 <a href="http://twitter.com/download/iphone" r... Meet Rufio. He is unaware of the pink legless ... https://twitter.com/dog_rates/status/674737130... 10 10 Rufio pupper
1903 674638615994089473 2015-12-09 17:15:54 +0000 <a href="http://twitter.com/download/iphone" r... This pupper is fed up with being tickled. 12/1... https://twitter.com/dog_rates/status/674638615... 12 10 None pupper
1907 674447403907457024 2015-12-09 04:36:06 +0000 <a href="http://twitter.com/download/iphone" r... This pupper just wants a belly rub. This puppe... https://twitter.com/dog_rates/status/674447403... 10 10 None pupper
1915 674318007229923329 2015-12-08 20:01:55 +0000 <a href="http://twitter.com/download/iphone" r... This is Lennon. He's in quite the predicament.... https://twitter.com/dog_rates/status/674318007... 8 10 Lennon pupper
1921 674262580978937856 2015-12-08 16:21:41 +0000 <a href="http://twitter.com/download/iphone" r... This is Gus. He's super stoked about being an ... https://twitter.com/dog_rates/status/674262580... 9 10 Gus pupper
1930 674038233588723717 2015-12-08 01:30:12 +0000 <a href="http://twitter.com/download/iphone" r... This is Kaiya. She's an aspiring shoe model. 1... https://twitter.com/dog_rates/status/674038233... 12 10 Kaiya pupper
1936 673956914389192708 2015-12-07 20:07:04 +0000 <a href="http://twitter.com/download/iphone" r... This is one esteemed pupper. Just graduated co... https://twitter.com/dog_rates/status/673956914... 10 10 one pupper
1937 673919437611909120 2015-12-07 17:38:09 +0000 <a href="http://twitter.com/download/iphone" r... This is Obie. He is on guard watching for evil... https://twitter.com/dog_rates/status/673919437... 11 10 Obie pupper
1945 673707060090052608 2015-12-07 03:34:14 +0000 <a href="http://twitter.com/download/iphone" r... This is Raymond. He's absolutely terrified of ... https://twitter.com/dog_rates/status/673707060... 10 10 Raymond pupper
1948 673697980713705472 2015-12-07 02:58:09 +0000 <a href="http://twitter.com/download/iphone" r... This is Pickles. She's a tiny pointy pupper. A... https://twitter.com/dog_rates/status/673697980... 8 10 Pickles pupper
1954 673656262056419329 2015-12-07 00:12:23 +0000 <a href="http://twitter.com/download/iphone" r... This is Albert AKA King Banana Peel. He's a ki... https://twitter.com/dog_rates/status/673656262... 10 10 Albert pupper
1956 673612854080196609 2015-12-06 21:19:54 +0000 <a href="http://twitter.com/download/iphone" r... This is Jeffri. He's a speckled ice pupper. Ve... https://twitter.com/dog_rates/status/673612854... 7 10 Jeffri pupper
1960 673363615379013632 2015-12-06 04:49:31 +0000 <a href="http://twitter.com/download/iphone" r... This little pupper can't wait for Christmas. H... https://twitter.com/dog_rates/status/673363615... 11 10 None pupper
1967 673342308415348736 2015-12-06 03:24:51 +0000 <a href="http://twitter.com/download/iphone" r... This is Django. He's a skilled assassin pupper... https://twitter.com/dog_rates/status/673342308... 10 10 Django pupper
1970 673295268553605120 2015-12-06 00:17:55 +0000 <a href="http://twitter.com/download/iphone" r... Meet Eve. She's a raging alcoholic 8/10 (would... https://twitter.com/dog_rates/status/673295268... 8 10 Eve pupper
1974 673148804208660480 2015-12-05 14:35:56 +0000 <a href="http://twitter.com/download/iphone" r... This is Fletcher. He's had a ruff night. No mo... https://twitter.com/dog_rates/status/673148804... 8 10 Fletcher pupper
1977 672988786805112832 2015-12-05 04:00:04 +0000 <a href="http://twitter.com/download/iphone" r... This is Schnozz. He's had a blurred tail since... https://twitter.com/dog_rates/status/672988786... 10 10 Schnozz pupper
1980 672975131468300288 2015-12-05 03:05:49 +0000 <a href="http://twitter.com/download/iphone" r... This is Chuckles. He is one skeptical pupper. ... https://twitter.com/dog_rates/status/672975131... 10 10 Chuckles pupper
1981 672970152493887488 2015-12-05 02:46:02 +0000 <a href="http://twitter.com/download/iphone" r... This is Chet. He's having a hard time. Really ... https://twitter.com/dog_rates/status/672970152... 7 10 Chet pupper
1985 672898206762672129 2015-12-04 22:00:08 +0000 <a href="http://twitter.com/download/iphone" r... This is Cheryl AKA Queen Pupper of the Skies. ... https://twitter.com/dog_rates/status/672898206... 11 10 Cheryl pupper
1991 672622327801233409 2015-12-04 03:43:54 +0000 <a href="http://twitter.com/download/iphone" r... This lil pupper is sad because we haven't foun... https://twitter.com/dog_rates/status/672622327... 12 10 None pupper
1992 672614745925664768 2015-12-04 03:13:46 +0000 <a href="http://twitter.com/download/iphone" r... This is Norman. Doesn't bark much. Very docile... https://twitter.com/dog_rates/status/672614745... 6 10 Norman pupper
1995 672594978741354496 2015-12-04 01:55:13 +0000 <a href="http://twitter.com/download/iphone" r... Meet Scott. Just trying to catch his train to ... https://twitter.com/dog_rates/status/672594978... 9 10 Scott pupper
2002 672481316919734272 2015-12-03 18:23:34 +0000 <a href="http://twitter.com/download/iphone" r... Say hello to Jazz. She should be on the cover ... https://twitter.com/dog_rates/status/672481316... 12 10 Jazz pupper
2009 672254177670729728 2015-12-03 03:21:00 +0000 <a href="http://twitter.com/download/iphone" r... This is Rolf. He's having the time of his life... https://twitter.com/dog_rates/status/672254177... 11 10 Rolf pupper
2015 672205392827572224 2015-12-03 00:07:09 +0000 <a href="http://twitter.com/download/iphone" r... This is Opal. He's a Royal John Coctostan. Rea... https://twitter.com/dog_rates/status/672205392... 9 10 Opal pupper
2017 672160042234327040 2015-12-02 21:06:56 +0000 <a href="http://twitter.com/download/iphone" r... This is Bubba. He's a Titted Peebles Aorta. Ev... https://twitter.com/dog_rates/status/672160042... 8 10 Bubba pupper

335 rows × 9 columns

In [26]:
#测试
dfTwitter_Archive_Clean['stage'].value_counts()
Out[26]:
pupper          220
doggo            72
puppo            23
floofer           9
doggopupper       9
doggofloofer      1
doggopuppo        1
Name: stage, dtype: int64

dfTwitter_Archive的name列含有a,an,the等异常名字

推测是取名字的时候,直接取了this is后面的第一个单词

通过正则表达式从推文中重新获取

In [27]:
dfTwitter_Archive_Clean['name'] = dfTwitter_Archive_Clean['text'].str.extract(r'\S*[This is|Here is|Here\'s|named|Meet|Say hello to|Here we have]\s([A-Z][a-z]+).+',expand=True)
In [28]:
#测试
dfTwitter_Archive_Clean['name'].value_counts()
Out[28]:
Charlie           11
Lucy              11
Oliver            11
Cooper            10
Tucker             9
Penny              9
Lola               8
Winston            8
Christmas          8
Sadie              8
Toby               8
Bo                 7
Daisy              7
Jax                6
Oscar              6
Bailey             6
Koda               6
Bella              6
Stanley            6
Rusty              5
Leo                5
Bentley            5
Louis              5
Chester            5
Dave               5
Zoey               5
Boomer             5
Milo               5
Buddy              5
Scout              5
                  ..
Hercules           1
Piers              1
Emma               1
Pixar              1
Carbon             1
Venezuelan         1
Fabio              1
Striker            1
Skittles           1
Geno               1
Teddy              1
Michelangelope     1
Forrest            1
Harper             1
Gromit             1
Parthenon          1
Am                 1
Cermet             1
Valentine          1
Apollo             1
Skye               1
Margo              1
Tuscaloosa         1
Bowie              1
Simba              1
Ralpher            1
Stark              1
Mitsubishi         1
Karll              1
Bobb               1
Name: name, Length: 1063, dtype: int64

dfTwitter_Archive中,基准分rating_denominator列不全部为10

从原始数据中重新提取。对于一条推特中存在两条评分记录的情况,取前一组评分。

In [29]:
dfTwitter_Archive_Clean.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2094 entries, 0 to 2355
Data columns (total 9 columns):
tweet_id              2094 non-null int64
timestamp             2094 non-null object
source                2094 non-null object
text                  2094 non-null object
expanded_urls         2094 non-null object
rating_numerator      2094 non-null int64
rating_denominator    2094 non-null int64
name                  1566 non-null object
stage                 335 non-null object
dtypes: int64(3), object(6)
memory usage: 163.6+ KB
In [30]:
dfTwitter_Archive_Clean['rating_numerator'],dfTwitter_Archive_Clean['rating_denominator'] = dfTwitter_Archive_Clean['text'].str.extract(r'([0-9]+\.?[0-9]*\/[0-9]+0)',expand=True)[0].str.split("/",1).str

#dfTwitter_Archive_Clean[dfTwitter_Archive_Clean['rating_numerator'].str.find('.')!=-1]

#其中有一条特殊情况,评分为24/7,手工修改数据
modified_index = dfTwitter_Archive_Clean[dfTwitter_Archive_Clean['rating_numerator'].isnull()].index[0]
dfTwitter_Archive_Clean.loc[modified_index,'rating_numerator']=24
dfTwitter_Archive_Clean.loc[modified_index,'rating_denominator']=7

#修改类型为float
dfTwitter_Archive_Clean['rating_numerator'] = dfTwitter_Archive_Clean['rating_numerator'].astype(float)
dfTwitter_Archive_Clean['rating_denominator'] = dfTwitter_Archive_Clean['rating_denominator'].astype(float)
In [31]:
#测试
dfTwitter_Archive_Clean['rating_denominator'].value_counts()
Out[31]:
10.0     2080
50.0        3
80.0        2
150.0       1
110.0       1
90.0        1
70.0        1
170.0       1
120.0       1
40.0        1
20.0        1
7.0         1
Name: rating_denominator, dtype: int64
In [32]:
dfTwitter_Archive_Clean['rating_numerator'].value_counts()
Out[32]:
12.00      485
10.00      435
11.00      413
13.00      287
9.00       153
8.00        98
7.00        51
14.00       39
5.00        33
6.00        32
3.00        19
4.00        16
2.00         9
1.00         4
13.50        1
0.00         1
24.00        1
84.00        1
420.00       1
1776.00      1
80.00        1
60.00        1
44.00        1
144.00       1
88.00        1
11.26        1
11.27        1
121.00       1
9.75         1
99.00        1
204.00       1
45.00        1
165.00       1
50.00        1
Name: rating_numerator, dtype: int64

dfTwitter_Archive的source列包含了HTML格式的代码

使用正则表达式提取URL

In [33]:
dfTwitter_Archive_Clean['source'] = dfTwitter_Archive_Clean['source'].str.extract(r'>(.+)<',expand=True)
In [34]:
#测试
dfTwitter_Archive_Clean.head()
Out[34]:
tweet_id timestamp source text expanded_urls rating_numerator rating_denominator name stage
0 892420643555336193 2017-08-01 16:23:56 +0000 Twitter for iPhone This is Phineas. He's a mystical boy. Only eve... https://twitter.com/dog_rates/status/892420643... 13.0 10.0 Phineas NaN
1 892177421306343426 2017-08-01 00:17:27 +0000 Twitter for iPhone This is Tilly. She's just checking pup on you.... https://twitter.com/dog_rates/status/892177421... 13.0 10.0 Tilly NaN
2 891815181378084864 2017-07-31 00:18:03 +0000 Twitter for iPhone This is Archie. He is a rare Norwegian Pouncin... https://twitter.com/dog_rates/status/891815181... 12.0 10.0 Archie NaN
3 891689557279858688 2017-07-30 15:58:51 +0000 Twitter for iPhone This is Darla. She commenced a snooze mid meal... https://twitter.com/dog_rates/status/891689557... 13.0 10.0 Darla NaN
4 891327558926688256 2017-07-29 16:00:24 +0000 Twitter for iPhone This is Franklin. He would like you to stop ca... https://twitter.com/dog_rates/status/891327558... 12.0 10.0 Franklin NaN

dfImage_Predictions中图片链接jpg_url存在重复值

删除这些重复值

In [35]:
dfImage_Predictions_Clean.drop_duplicates(subset='jpg_url',inplace=True)
In [36]:
#测试
dfImage_Predictions_Clean['jpg_url'].duplicated().sum()
Out[36]:
0

dfImage_Predictions中对于品种的写法不规范,存在大小写混搭,分隔有用空格、减号、下划线

字母全部使用小写。分隔则全部使用下划线。

In [37]:
dfImage_Predictions_Clean[['p1','p2','p3']] = dfImage_Predictions_Clean[['p1','p2','p3']].applymap(str.lower)
dfImage_Predictions_Clean[['p1','p2','p3']] = dfImage_Predictions_Clean[['p1','p2','p3']].replace(' ','_').replace('-','_')
In [38]:
#测试
dfImage_Predictions_Clean.head(5)
Out[38]:
tweet_id jpg_url img_num p1 p1_conf p1_dog p2 p2_conf p2_dog p3 p3_conf p3_dog
0 666020888022790149 https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg 1 welsh_springer_spaniel 0.465074 True collie 0.156665 True shetland_sheepdog 0.061428 True
1 666029285002620928 https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg 1 redbone 0.506826 True miniature_pinscher 0.074192 True rhodesian_ridgeback 0.072010 True
2 666033412701032449 https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg 1 german_shepherd 0.596461 True malinois 0.138584 True bloodhound 0.116197 True
3 666044226329800704 https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg 1 rhodesian_ridgeback 0.408143 True redbone 0.360687 True miniature_pinscher 0.222752 True
4 666049248165822465 https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg 1 miniature_pinscher 0.560311 True rottweiler 0.243682 True doberman 0.154629 True

三个Dataframe的观察对象相同,应放在同一Dataframe内

使用merge合并三个Dataframe

In [39]:
#合并Dataframe
dfClean = pd.merge(dfTwitter_Archive_Clean,dfImage_Predictions_Clean,how='inner',on='tweet_id').merge(dfJson_Tweeter_Clean,how='left',on='tweet_id')
In [40]:
dfClean.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1971 entries, 0 to 1970
Data columns (total 22 columns):
tweet_id              1971 non-null int64
timestamp             1971 non-null object
source                1971 non-null object
text                  1971 non-null object
expanded_urls         1971 non-null object
rating_numerator      1971 non-null float64
rating_denominator    1971 non-null float64
name                  1520 non-null object
stage                 303 non-null object
jpg_url               1971 non-null object
img_num               1971 non-null int64
p1                    1971 non-null object
p1_conf               1971 non-null float64
p1_dog                1971 non-null bool
p2                    1971 non-null object
p2_conf               1971 non-null float64
p2_dog                1971 non-null bool
p3                    1971 non-null object
p3_conf               1971 non-null float64
p3_dog                1971 non-null bool
retweet_count         1971 non-null int64
favorite_count        1971 non-null int64
dtypes: bool(3), float64(5), int64(4), object(10)
memory usage: 313.7+ KB
In [41]:
dfClean.tail()
Out[41]:
tweet_id timestamp source text expanded_urls rating_numerator rating_denominator name stage jpg_url ... p1_conf p1_dog p2 p2_conf p2_dog p3 p3_conf p3_dog retweet_count favorite_count
1966 666049248165822465 2015-11-16 00:24:50 +0000 Twitter for iPhone Here we have a 1949 1st generation vulpix. Enj... https://twitter.com/dog_rates/status/666049248... 5.0 10.0 Fox NaN https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg ... 0.560311 True rottweiler 0.243682 True doberman 0.154629 True 41 111
1967 666044226329800704 2015-11-16 00:04:52 +0000 Twitter for iPhone This is a purebred Piers Morgan. Loves to Netf... https://twitter.com/dog_rates/status/666044226... 6.0 10.0 Piers NaN https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg ... 0.408143 True redbone 0.360687 True miniature_pinscher 0.222752 True 147 309
1968 666033412701032449 2015-11-15 23:21:54 +0000 Twitter for iPhone Here is a very happy pup. Big fan of well-main... https://twitter.com/dog_rates/status/666033412... 9.0 10.0 NaN NaN https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg ... 0.596461 True malinois 0.138584 True bloodhound 0.116197 True 47 128
1969 666029285002620928 2015-11-15 23:05:30 +0000 Twitter for iPhone This is a western brown Mitsubishi terrier. Up... https://twitter.com/dog_rates/status/666029285... 7.0 10.0 Mitsubishi NaN https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg ... 0.506826 True miniature_pinscher 0.074192 True rhodesian_ridgeback 0.072010 True 48 132
1970 666020888022790149 2015-11-15 22:32:08 +0000 Twitter for iPhone Here we have a Japanese Irish Setter. Lost eye... https://twitter.com/dog_rates/status/666020888... 8.0 10.0 Japanese NaN https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg ... 0.465074 True collie 0.156665 True shetland_sheepdog 0.061428 True 530 2528

5 rows × 22 columns

In [42]:
#保存文件
dfClean.to_csv('twitter_archive_master.csv', index=False)

数据分析

提出问题:

  • 1、哪些名字在狗起名之中比较热门?
  • 2、哪些品种的狗能获得较多的转发和点赞?
  • 3、狗的评分是否会影响转发与点赞?

1、哪些名字在狗起名之中比较热门?

In [43]:
#取姓名的排名
dfNameRanking = pd.DataFrame(dfClean.groupby('name',as_index=False)['tweet_id'].count().sort_values(by='tweet_id',ascending=False))
dfNameRanking = dfNameRanking.rename(columns={'tweet_id':'counts'})
dfNameRanking.reset_index(drop=True,inplace=True)
dfNameRanking.head(10)
Out[43]:
name counts
0 Oliver 11
1 Charlie 11
2 Cooper 10
3 Lucy 10
4 Penny 9
5 Tucker 9
6 Toby 8
7 Winston 8
8 Sadie 8
9 Christmas 8
In [44]:
lbName = np.full(len(dfNameRanking.name.value_counts()),"",dtype=object)
lbName[0]='Oliver'
lbName[1]='Charlie'
dfNameRanking.counts.plot(kind='pie',labels=lbName);

结论:狗的名字五花八门,但是像Charlie和Oliver这样的名字是比较热门的。

2、哪些品种的狗能获得较多的转发和点赞?

In [45]:
#选取所有狗的品种
dfVariety = pd.DataFrame(columns=['tweet_id','variety'])
for i in dfClean.index:
    if dfClean.p1_dog.loc[i] == True:
        dfVariety = dfVariety.append({'tweet_id':str(dfClean.tweet_id.loc[i]),'variety':dfClean.p1.loc[i]},ignore_index=True)
    elif dfClean.p2_dog.loc[i] == True:
        dfVariety = dfVariety.append({'tweet_id':str(dfClean.tweet_id.loc[i]),'variety':dfClean.p2.loc[i]},ignore_index=True)
    elif dfClean.p3_dog.loc[i] == True:
        dfVariety = dfVariety.append({'tweet_id':str(dfClean.tweet_id.loc[i]),'variety':dfClean.p3.loc[i]},ignore_index=True)
    else:
        dfVariety = dfVariety.append({'tweet_id':str(dfClean.tweet_id.loc[i]),'variety':np.nan},ignore_index=True)

dfVariety['tweet_id'] = dfVariety['tweet_id'].astype(int)
#去除空值
dfVariety.dropna(subset=['variety'],inplace=True)
        
#合并获得转发和点赞
dfVariety = pd.merge(dfVariety,dfClean[['tweet_id','retweet_count','favorite_count']],how='left',on='tweet_id')
dfVariety.head()
Out[45]:
tweet_id variety retweet_count favorite_count
0 892177421306343426 chihuahua 6480 33786
1 891815181378084864 chihuahua 4301 25445
2 891689557279858688 labrador_retriever 8925 42863
3 891327558926688256 basset 9721 41016
4 891087950875897856 chesapeake_bay_retriever 3240 20548
In [46]:
#计算种类的排名,通过转推和喜欢数相加来计算
dfRanking = dfVariety.groupby('variety').sum().dropna()[['retweet_count','favorite_count']]
dfRanking['total'] = dfRanking['retweet_count']+dfRanking['favorite_count']

dfCountTweet = dfVariety.groupby('variety').size().rename('counts')

dfTempRanking = dfRanking.join(dfCountTweet).sort_values(by='total',ascending=False).head(10)
#dfTempRanking.reset_index(drop=True,inplace=True)
dfTempRanking
Out[46]:
retweet_count favorite_count total counts
variety
golden_retriever 589274.0 1953598.0 2542872.0 156
labrador_retriever 403152.0 1260635.0 1663787.0 106
pembroke 290323.0 1035633.0 1325956.0 94
chihuahua 252834.0 753813.0 1006647.0 90
samoyed 202313.0 582082.0 784395.0 42
french_bulldog 155290.0 568978.0 724268.0 31
chow 133512.0 456699.0 590211.0 48
cocker_spaniel 147681.0 413968.0 561649.0 30
pug 118051.0 382463.0 500514.0 62
malamute 108983.0 350710.0 459693.0 33
In [47]:
dfTempRanking['total'].plot(kind='bar',figsize=(8,5),color='#e597b2');
In [48]:
dfTempRanking['counts'].plot(kind='bar',figsize=(8,5),color='#00a3af');

结论:根据转发和喜欢数,可以看出金毛寻回犬(golden_retriever)、拉布拉多(Labrador_retriever)和彭布罗克威尔士柯基犬(Pembroke)占据了热门中的前三。看起来人们比较喜欢容易养并且外观可爱的狗。

3、狗的评分是否会影响转发与点赞?

In [49]:
#通过回归来探索相关性
#因为存在不为10的基准评分(如多条狗的情况),通过rating_numerator / rating_denominator来计算
dfTempRate = dfClean[['rating_numerator','rating_denominator','retweet_count','favorite_count']].dropna()
dfTempRate['Rate']=dfTempRate['rating_numerator'] / dfTempRate['rating_denominator']

#去掉异常值
dfTempRate = dfTempRate[dfTempRate['Rate']<2]

sns.lmplot(x='Rate', y='favorite_count',data=dfTempRate,size=7);
In [50]:
sns.lmplot(x='Rate', y='retweet_count',data=dfTempRate,size=7);

结论:通过回归发现,推主的评分在一定程度上对转发和喜欢数有正相关的影响。

In [ ]: