1.安装
2.数据库的操作
安装依赖包:
pip install cassandra-driver
实例:
# coding:utf-8
from cassandra import ConsistencyLevel
# 引入数据
from cassandra.cluster import Cluster
# 引入DCAwareRoundRobinPolicy模块,可用来自定义驱动程序的行为
# from cassandra.policies import DCAwareRoundRobinPolicy
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
import pandas as pd
cluster = Cluster(contact_points=[‘127.0.0.1’],
port=9042)
session = cluster.connect()
# 创建keyspace和table
def createKeySpaceAndTable():
# 创建KeySpace;使用第一个副本放置策略,即简单策略;选择复制因子为3个副本。
session.execute(“CREATE KEYSPACE test WITH replication = {‘class’:’SimpleStrategy’, ‘replication_factor’ : 1};”)
# 选择keyspace
session.execute(‘use test;’)
# 创建table
session.execute(‘create table test.user(name text primary key, age int, email varchar);’)
# 删除table
# session.execute(‘drop table test.user;’)
# 关闭Session
session.shutdown()
# 关闭Cluster
cluster.shutdown()
”’查询keyspaces/tables/columns状态”’
def selectAllStatus():
# 查询keyspaces/tables/columns状态
print(cluster.metadata.keyspaces)
print(‘———-‘)
print(cluster.metadata.keyspaces[‘test’].tables)
print(‘———-‘)
print(cluster.metadata.keyspaces[‘test’].tables[‘user’])
print(‘———-‘)
print(cluster.metadata.keyspaces[‘test’].tables[‘user’].columns)
print(‘———-‘)
print(cluster.metadata.keyspaces[‘test’].tables[‘user’].columns[‘age’])
print(‘———-‘)
# 关闭连接
cluster.shutdown()
# 查看是否关闭连接
print(cluster.is_shutdown)
”’插入和查询表中的数据”’
def testInsertAndSelectTable():
# table中插入数据
session.execute(‘insert into test.user (name, age, email) values (%s, %s, %s);’, [‘aaa’, 21, ‘222@21.com’])
session.execute(‘insert into test.user (name, age, email) values (%s, %s, %s);’, [‘bbb’, 22, ‘bbb@22.com’])
session.execute(‘insert into test.user (name, age, email) values (%s, %s, %s);’, [‘ddd’, 20, ‘ccc@20.com’])
# table中查询数据
rows = session.execute(‘select * from test.user;’)
for row in rows:
print(row)
# 关闭连接
cluster.shutdown()
# 查看是否关闭连接
print(cluster.is_shutdown)
”’连接远程数据库”’
def testContectRemoteDatabase():
# 配置Cassandra集群的IP,记得改成自己的远程数据库IP哦
contact_points = [‘1.1.1.1’, ‘2.2.2.2’, ‘3.3.3.3’]
# 配置登陆Cassandra集群的账号和密码,记得改成自己知道的账号和密码
auth_provider = PlainTextAuthProvider(username=’XXX’, password=’XXX’)
# 创建一个Cassandra的cluster
cluster = Cluster(contact_points=contact_points, auth_provider=auth_provider)
# 连接并创建一个会话
session = cluster.connect()
# 定义一条cql查询语句
cql_str = ‘select * from keyspace.table limit 5;’
simple_statement = SimpleStatement(cql_str, consistency_level=ConsistencyLevel.ONE)
# 对语句的执行设置超时时间为None
execute_result = session.execute(simple_statement, timeout=None)
# 获取执行结果中的原始数据
result = execute_result._current_rows
# 把结果转成DataFrame格式
result = pd.DataFrame(result)
# 把查询结果写入csv
result.to_csv(‘连接远程数据库.csv’, mode=’a’, header=True)
# 关闭连接
cluster.shutdown()
if __name__ == ‘__main__’:
# 创建keyspace和table
#createKeySpaceAndTable()
# 查询keyspaces/tables/columns状态
#selectAllStatus()
# 插入和查询表中的数据
testInsertAndSelectTable()