A simple web crawler project.
To get all payments info from a well running music NFT platform pianity. Anything not understand, can contact me through twitter DM.
python code here:
# -*- coding: utf-8 -*-
# @Time : 2022/2/22 22:06
# @Author : uzan
# @FileName: pianity.py
import csv
from datetime import datetime
import json
import locale
import os
import sys
import time
import requests
def get_data_to_file(dir_path):
i = 0
while True:
status_code, content = fetch_data_from_remote_db(50 * i)
if status_code != 200:
break
with open(dir_path+"/data" + str(50*i+1)+"-"+str(50*i+50)+".json", "w", encoding="utf-8") as f:
f.write(content)
print("======from "+str(50*i+1)+"---to---"+str(50*i+50))
print(status_code)
print(content)
i = i + 1
# sleep for 1 second in case being blacklisted
time.sleep(5)
def fetch_data_from_remote_db(skip):
'''
send a post request with a payload wrapping hql query parameter
:param skip: offset number indicating from which data to query
:return:
'''
data = {
"operationName": "getPayments",
"variables": {
"limit": 50,
"skip": skip,
"currency": "EUR"
},
"query": "query getPayments($bid: Boolean, $bidFilter: BidFilter, $releaseFilter: ReleaseFilter, $saleFilter: SaleFilter, $invertSaleFilter: Boolean, $step: [CheckoutStep!], $limit: Int, $skip: Int, $currency: Currency!) {\n payments(\n bid: $bid\n bidFilter: $bidFilter\n releaseFilter: $releaseFilter\n saleFilter: $saleFilter\n invertSaleFilter: $invertSaleFilter\n step: $step\n limit: $limit\n skip: $skip\n ) {\n ...DetailedPayment\n __typename\n }\n}\n\nfragment DetailedPayment on Payment {\n id\n step\n bid\n amount(currency: $currency) {\n ...DetailedPrice\n __typename\n }\n user {\n ...BasicUser\n __typename\n }\n sale {\n id\n by {\n ...BasicUser\n __typename\n }\n nft {\n id\n number\n release {\n id\n rarity\n tokenId\n track {\n id\n slug\n title\n minifiedThumbnailURL\n artist {\n id\n slug\n name\n avatarUrl\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n previous {\n bid\n amount(currency: $currency) {\n ...DetailedPrice\n __typename\n }\n __typename\n }\n createdAt\n __typename\n}\n\nfragment DetailedPrice on CurrencyPrice {\n symbol\n currency\n amount\n __typename\n}\n\nfragment BasicUser on User {\n id\n slug\n username\n avatarUrl\n artist {\n id\n slug\n name\n avatarUrl\n __typename\n }\n __typename\n}\n"
}
url = "https://pianity.com/api/graphql"
headers = {
"accept": "*/*",
# "accept-encoding": "deflate",
"accept-language": "en-US,en;q=0.9",
"authorization": "undefined",
"content-length": "1686",
"content-type": "application/json",
"cookie": "pianity-user-currency=EUR; _scid=4c9d6e23-315b-4cd2-82b5-74d777ebb236; ajs_anonymous_id=31ef9070-3ed2-45ac-91ba-5541cc9a58c9; amplitude_idundefinedpianity.com=eyJvcHRPdXQiOmZhbHNlLCJzZXNzaW9uSWQiOm51bGwsImxhc3RFdmVudFRpbWUiOm51bGwsImV2ZW50SWQiOjAsImlkZW50aWZ5SWQiOjAsInNlcXVlbmNlTnVtYmVyIjowfQ==; crisp-client%2Fsession%2F9d472850-50f2-4825-8846-0b2fc939352f=session_4e7e9e38-c259-4f08-a3e6-28fd5a86bb93; __stripe_mid=036d6d17-4f46-4231-9af0-c19c53b999e7924a18; __stripe_sid=c4ddf676-7e78-478f-aeb0-0e30801506b0c21c42; ph_8penyn2T_ZFD6cO57_G9DWH-tdSq6CiG0ljunwqMtCw_posthog=%7B%22distinct_id%22%3A%2217f20a5bc19230-0ab4dc515aa1b8-f791b31-144000-17f20a5bc1a356%22%2C%22%24device_id%22%3A%2217f20a5bc19230-0ab4dc515aa1b8-f791b31-144000-17f20a5bc1a356%22%2C%22%24initial_referrer%22%3A%22%24direct%22%2C%22%24initial_referring_domain%22%3A%22%24direct%22%2C%22%24referrer%22%3A%22%24direct%22%2C%22%24referring_domain%22%3A%22%24direct%22%2C%22%24sesid%22%3A%5B1645520221264%2C%2217f20a5bc2025f-01cb4548a43ea4-f791b31-144000-17f20a5bc219a0%22%5D%2C%22%24session_recording_enabled%22%3Afalse%2C%22%24active_feature_flags%22%3A%5B%5D%2C%22%24enabled_feature_flags%22%3A%7B%7D%7D; amplitude_id_a37e68c66e90bca93d956222add910b5pianity.com=eyJkZXZpY2VJZCI6IjNmODJkMDc4LTdlMDMtNDY1OC04MDM4LWU3ZGE4NTU3NTI1MlIiLCJ1c2VySWQiOm51bGwsIm9wdE91dCI6ZmFsc2UsInNlc3Npb25JZCI6MTY0NTUyMDIxMTAzNywibGFzdEV2ZW50VGltZSI6MTY0NTUyMDIyMTM2NiwiZXZlbnRJZCI6NCwiaWRlbnRpZnlJZCI6MCwic2VxdWVuY2VOdW1iZXIiOjR9",
"origin": "https://pianity.com",
"referer": "https://pianity.com/leaderboards",
"sec-ch-ua": "\" Not;A Brand\";v=\"99\", \"Google Chrome\";v=\"97\", \"Chromium\";v=\"97\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "Windows",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
}
req = requests.post(url, data=json.dumps(data), headers=headers)
content = None
if req is not None and req.content is not None:
content = req.content.decode('UTF-8')
return req.status_code, content
def data_from_json(path):
f = open(path, encoding='utf8')
data = json.load(f)
data_dict_list = list()
payment_list = data['data']['payments']
for p in payment_list:
if p["bid"]:
continue
data_dict = {
"transaction-id": p["id"],
"transaction-volume": p["amount"]["amount"],
"consumer-id": p["user"]["id"],
"consumer-name": p["user"]["username"],
"track-id": p["sale"]["nft"]["release"]["track"]["id"],
"track-name": p["sale"]["nft"]["release"]["track"]["title"],
"artist-id": p["sale"]["nft"]["release"]["track"]["artist"]["id"],
"artist-name": p["sale"]["nft"]["release"]["track"]["artist"]["name"],
"nft-id": p["sale"]["nft"]["id"],
"nft-number": str(p["sale"]["nft"]["number"]),
"nft-rarity": p["sale"]["nft"]["release"]["rarity"],
"time": str(datetime.fromtimestamp(int(p["createdAt"]) / 1000).strftime('%Y-%m-%d %H:%M:%S'))
}
print(data_dict)
data_dict_list.append(data_dict)
return data_dict_list
def write_data_to_excel(dir_path):
header=["transaction-id","transaction-volume","consumer-id","consumer-name","track-id","track-name","artist-id","artist-name","nft-id","nft-number","nft-rarity","time"]
csvfile=open('all-trx-data.csv','a',newline='', encoding="utf8")
writer = csv.DictWriter(csvfile, fieldnames=header)
writer.writeheader()
file_list=os.listdir(dir_path)
for f in file_list:
file_path=dir_path+"/"+f
print("parsing file:" + file_path)
data_list=data_from_json(file_path)
writer.writerows(data_list)
csvfile.close()
if __name__ == "__main__":
data_dir_path="pianity"
#get_data_to_file(data_dir_path)
write_data_to_excel(data_dir_path)