28

SQL, "--", . , , . SQL.

:


SQL?

SQL (Structured Query Language) . - , , SQL. , SQL - .

, . ( MySQL, PostgreSQL, Microsoft SQL Server), SQL, , SQL, .

, Python, , , SQL . Indeed 80 000 SQL.

SQL! Hubway, 1,5 , .

   Hubway

, , , SQL.

, :


SQL:

- , .

, , . , . Excel , , : , , , , , .

 Excel

, , , , , , .

, , . . , , .

, , . : . "" , , , . "" , , , .

, , .


. , trips ( "") stations ( ""). trips. :

  • id - , .
  • duration - , ;
  • start_date - ;
  • start_station - , id stations , ;
  • end_date - ;
  • end_station - , ;
  • bike_number - Hubway , ;
  • sub_type - . "" , "" ;
  • zip_code - ( );
  • birth_date - ( );
  • gender - ( );

, , SQL, :

  • ?
  • "" ?
  • ?
  • : ?
  • ?
  • 30 ?

SQL, :

  • SELECT
  • WHERE
  • LIMIT
  • ORDER BY
  • GROUP BY
  • AND
  • OR
  • MIN
  • MAX
  • AVG
  • SUM
  • COUNT

SQLite3. SQLite Python 2.5, , Python, SQLite. Python SQLite3 Anaconda, .

Python SQL Pandas, . , , , .

Python, SQLite3 . " " SQLite3 :

~$ sqlite hubway.db SQLite version 3.14.0 2016-07-26 15:17:14 Enter ".help" for usage hints.sqlite>	

, , , .

SQLite Python. Jupyter .

, ( ) :

import sqlite3
import pandas as pd
db = sqlite3.connect('hubway.db')
def run_query(query):
    return pd.read_sql_query(query,db)

SELECT

, - SELECT. SELECT , - , . ( ) * .

, , , . FROM, . , start_date bike_number trips, :

SELECT start_date, bike_number FROM trips;

SELECT, , . , start_date bike_number. , , , , trips FROM.

, SQL- (;). SQL , , .


LIMIT

, Hubway, - LIMIT. LIMIT , .

SELECT, , trips, , . start_date bike_number , LIMIT :

SELECT start_date, bike_number FROM trips LIMIT 5;

LIMIT, , , . 5, , , .

LIMIT Hubway - 1,5 , , , .

Hubway. "", , , . :

query = 'SELECT * FROM trips LIMIT 5;'
run_query(query)
LIMIT

* . , SELECT trips. LIMIT, .

, . , , . , . , .

. , :

query = 'SELECT duration, start_date FROM trips LIMIT 5'
run_query(query)
SELECT duration, start_date FROM trips LIMIT 5

ORDER BY

, , , - ORDER BY. .

, , . , ORDER BY . , ASC DESC - .

, , :

ORDER BY duration ASC

SELECT, LIMIT ORDER BY : ?

, , .

duration . , , , . , , , :

  • SELECT duration FROM trips.
  • ORDER BY duration DESC .
  • LIMIT, 1

, .

, - , , . , , . ( ), . Python , .

, .

query = '''
SELECT duration FROM trips
ORDER BY duration DESC
LIMIT 1;
'''
run_query(query)
ORDER BY

, 9999 , 166 . 9999, , , .

, . 9999 . , , , , LIMIT , 10 :

query = '''
SELECT durationFROM trips
ORDER BY duration DESC
LIMIT 10
'''
run_query(query)
LIMIT 10

, 9999, , . , .

Hubway 30 (-, 9999 , 25 ), , 4- .


WHERE

, , , ? WHERE. WHERE , . , , , ID B00400:

WHERE bike_number = "B00400"

, , . , bike_number . , .

, WHERE 9990 :

query = '''
SELECT * FROM trips
WHERE duration > 9990;
'''
run_query(query)
WHERE

, 14 , 9990 . -, , , sub_type "". , , "" . , , .

, SQL - .

WHERE, WHERE, AND OR. , , 9990 , sub_type Registered, AND .

: , , . , .

. , , , :

query = '''
SELECT * FROM trips
WHERE (duration >= 9990) AND (sub_type = "Registered")
ORDER BY duration DESC;
'''
run_query(query)
WHERE (duration >= 9990)

, , - " "" ?" , , , WHERE, , sub_type "Registered", .

SQL COUNT , .

COUNT . , COUNT (column_name) ( ) , , :

SELECT COUNT(id)
FROM trips

, , . ( "") . , null, COUNT id - id null, , .

COUNT (1) COUNT (*) . , COUNT NULL. , , .

, . SELECT COUNT (*) WHERE sub_type = "Registered", , , .

query = '''
SELECT COUNT(*)FROM trips
WHERE sub_type = "Registered";
'''
run_query(query)
COUNT

. . - , , . , AS, . ,

query = '''
SELECT COUNT(*) AS "Total Trips by Registered Users"
FROM trips
WHERE sub_type = "Registered";
'''
run_query(query)
SELECT COUNT(*)

COUNT , SQL. SUM, AVG, MIN MAX , , . COUNT .

, " ?", AVG duration (, , AS, ):

query = '''
SELECT AVG(duration) AS "Average Duration"
FROM trips;
'''
run_query(query)
AVG(duration)

, 912 , 15 . , , Hubway 30 . , , .

, ? - SELECT AVG (duration) FROM trips WHERE, "" "" .

-. SQL GROUP BY.


GROUP BY

GROUP BY .

, , gender. "", "" "" (; ).

GROUP BY, , , . , , , - .

, . , COUNT, .

, , , : .

  • , SELECT, , . sub_type AVG (duration).
  • GROUP BY sub_type, .

, :

query = '''
SELECT sub_type, AVG(duration) AS "Average Duration"
FROM trips
GROUP BY sub_type;
'''
run_query(query)
SELECT sub_type, AVG(duration)

! 11 , 25 . , , , , . , , .

, , , (, ), , , . , , ,

. : " ?" . , , - , , :

query = '''
SELECT bike_number as "Bike Number", COUNT(*) AS "Number of Trips"
FROM trips
GROUP BY bike_number
ORDER BY COUNT(*) DESC
LIMIT 1;
'''
run_query(query)
 B00490

, B00490 . , :

  • - SELECT, , bike_number . AS .
  • FROM, , , , .
  • . GROUP BY, COUNT bike_number .
  • ORDER BY, , .
  • , LIMIT, , , , , - , .

. 30 .

, 30-, . - . SQL +, -, * / .

query = '''
SELECT AVG(duration) FROM trips
WHERE (2017 - birth_date) > 30;
'''
run_query(query)

JOIN

, . , SQL , , .

, . stations Hubway id, trips.

, . , , customer_id.

, , order_number name . , :

SELECT order_number, name
FROM orders;

, order_number name , . , , :

  • order_number?
  • name?
  • ?

, SELECT. - , ".". , SELECT order_number SELECT orders.order_number, customers.name. , , , .

, , JOIN ON. JOIN , , ON , .

, , , , ON, . JOIN , FROM. , FROM orders INNER JOIN customers, FROM customers INNER JOIN orders.

, customer_id . ON, , :

ON orders.customer_ID = customers.customer_id

"." , . , , , :

SELECT orders.order_number, customers.name
FROM orders
INNER JOIN customers
ON orders.customer_id = customers.customer_id

, .

Hubway, , JOIN .

, . , 5 , , :

query = '''
SELECT * FROM stations
LIMIT 5;
'''
run_query(query)
JOIN
  • id - ( start_station end_station );
  • station - ;
  • municipality - , (, , );
  • lat - ;
  • lng - ;

, , :

  • ?
  • ?

, , : " ?" :

  • SELECT station COUNT .
  • , JOIN, ON start_station id .
  • - (GROUP BY station), COUNT
  • , COUNT ORDER BY COUNT LIMIT .
query = '''
SELECT stations.station AS "Station", COUNT(*) AS "Count"
FROM trips INNER JOIN stations
ON trips.start_station = stations.id
GROUP BY stations.station 
ORDER BY COUNT(*) DESC
LIMIT 5;
'''
run_query(query)

, , . , - , .

, , - ? , . , WHERE, COUNT , start_station end_station.

query = '''
SELECT stations.station AS "Station", COUNT(*) AS "Count"
FROM trips INNER JOIN stations
ON trips.start_station = stations.id
WHERE trips.start_station = trips.end_station
GROUP BY stations.station
ORDER BY COUNT(*) DESC
LIMIT 5;
'''
run_query(query)

, , . , , Hubway, A B, , , .

- , , . ? , . , , :

: ? . , . , . start_station, end_station.

, , start_station, , end_station. , , , AS.

, "start", . "start" "." , JOIN ( JOIN, end_station ):

INNER JOIN stations AS start ON trips.start_station = start.id

. , " " " ", != .

query =
'''
SELECT COUNT(trips.id) AS "Count"
FROM trips INNER JOIN stations AS start
ON trips.start_station = start.id
INNER JOIN stations AS end
ON trips.end_station = end.id
WHERE start.municipality <> end.municipality;
'''
run_query(query)
 300 000  1,5   ( 20%)

, 300 000 1,5 ( 20%) , - , Hubway , .

, ! SQL. , . , SQL.


?

SQL , , . Hubway, , . , :

  • ( 30 )?
  • ?
  • , , ?
  • ?

>