In
[ ]: queries = ["" for i in range(0, 25)]
### 0. List all the users who have at least 1000 UpVotes.
### Output columns and order: Id, Reputation, CreationDate, DisplayName
### Order by Id ascending
queries[0] = """
select Id, Reputation, CreationDate, DisplayName
from users
where UpVotes >= 1000
order by Id asc;
"""
### 1. List the posts (Id, Title, Tags) for all posts that are tagged 'postgres
### Hint: use ``like'' -- note that tags are enclosed in '<>' in the Tags field
### Output column order: Id, Title, Tags
### Order by Id ascending
queries[1] = """
select Id, Title, Tags
from posts
where Tags like '%<postgresql-9.4%>'
order by Id asc;
"""
### 2. Write a query to output the number of years users have been on the
### platform (assuming they started on 'CreationDate') as of September 1, 2022
### Use 'age' function that operates on dates (https://www.postgresql.org/docs/
### Restrict output to Users with DisplayName = 'Jason'
### Output columns: Id, DisplayName, Num_years
### Order output by Num_years increasing, and then by Id ascending
queries[2] = """
select Id, DisplayName, extract(year from age(timestamp '2022-09-01', CreationD
from users
where DisplayName = 'Jason'
order by Num_years, Id asc;
"""
### 3. Select all the "distinct" years that users with names starting with 'M'
### joined the platform (i.e., created their accounts).
### Output column: Year
### Order output by Year ascending
queries[3] = """
select DISTINCT each_year from
(select extract(year from CreationDate) as each_year
from users
where DisplayName like 'M%')
as Years
order by each_year asc;
"""
### 4. Write a query to find users who have, on average, given at least 1 UpVot
### day they have been on the platform as of September 1, 2022
### Hint: Use subtraction on "date" to get number of days between two dates.
### Count only full days (i.e., someone who joined 1.5 days ago only needs to
### have provided 1 UpVote to make it into the result)
### Output columns: Id, DisplayName, CreationDate, UpVotes
### Order by Id ascending
queries[4] = """
with temp as
SELECT ('2022-09-01' - CreationDate) as Num_Days, Id, upvotes, CreationDate
from users
select id, DisplayName, CreationDate, upvotes
from temp
where Num_Days <= upvotes
order by Id asc
"""
### 5. Write a single query to report all Badges for the users with reputation
### Output Column: Id (from Users), DisplayName, Name (from Badges), Reputation
### Order by: Id increasing
queries[5] = """
SELECT users.id, DisplayName, badges.name, reputation
from users INNER JOIN badges ON badges.userid = users.id
where reputation BETWEEN 10000 AND 11000
order by Id asc;
"""
### 6. Write a query to find all Posts who satisfy one of the following conditi
### - the post title contains 'postgres' and the number of views is at l
### - the post title contains 'mongodb' and the number of views is at le
### The match should be case insensitive
### Output columns: Id, Title, ViewCount
### Order by: Id ascending
queries[6] = """
select Id, Title, ViewCount
from posts
where (lower(title) LIKE '%postgres%' AND viewcount >= 50000) OR (lower(title)
order by Id asc;
"""
### 7. Count the number of the Comments made by the user with DisplayName 'JHFB
### Output columns: Num_Comments
queries[7] = """
select count(*) as Num_Comments
from
(select users.id
from users INNER JOIN comments ON users.id = comments.userid
where users.displayname = 'JHFB') as atable;
"""
### 8. Find the Users who have received badges with names: "Guru" and "Curious"
### Only report a user once even if they have received multiple badges with the
### Hint: Use Intersect.
### Output columns: UserId
### Order by: UserId ascending
queries[8] = """
select DISTINCT UserId from
(SELECT userid
FROM badges
WHERE name = 'Guru'
INTERSECT
SELECT userid
FROM badges
WHERE name = 'Curious') as received
order by UserId asc;
"""
### 9. "Tags" field in Posts lists out the tags associated with the post in the
### Find the Posts with at least 6 tags, with one of the tags being postgresql
### Hint: use "string_to_array" and "cardinality" functions.
### Output columns: Id, Title, Tags
### Order by: Id ascending
queries[9] = """
select Id, Title, Tags
from posts
where (Tags like '%<postgresql>%') AND (LENGTH(Tags) - LENGTH(REPLACE(Tags,'<',
order by Id asc;
"""
### 10. SQL "with" clause can be used to simplify queries. It essentially allow
### specifying temporary tables to be used during the rest of the query. See Se
### 3.8.6 (6th Edition) for some examples.
###
### Write a query to find the name(s) of the user(s) with the largest number of
### We have provided a part of the query to build a temporary table.
###
### Output columns: Id, DisplayName, Num_Comments
### Order by Id ascending (there may be more than one answer)
queries[10] = """
with temp as (
select Users.Id as Id, DisplayName, count(*) as num_Comments
from users, comments
where users.id = comments.userid
group by users.id, users.displayname
order by num_Comments desc)
select * from temp where num_Comments = (select num_Comments from temp LIMIT 1)
order by Id asc;
"""
### 11. List the users who posted no comments and with at least 500 views.
### Hint: Use "not in".
### Output Columns: Id, DisplayName
### Order by Id ascending
# Use temp to get a list of user ids, call them temp_id, that have made comment
# select id and display name for users where the userid is not in temp_id, mean
queries[11] = """
with temp as (
select users.Id as temp_id
from users, comments
where users.id = comments.userid
group by users.id, users.displayname
select Id, DisplayName
from Users
where (views >= 500) AND (Id NOT IN (select DISTINCT temp_id from temp))
order by Id asc;
"""
### 12. Write a query to output a list of posts with comments, such that PostTy
### and the comment has score of at least 10. So there may be multiple rows wit
### in the output.
### Output: Id (Posts), Title, Text (Comments)
### Order by: Id ascending
# want posttypeid = 6
queries[12] = """
with temp as (
select posts.id as temp_id, posts.title as temp_title, comments.text as tem
from posts, comments
where (posts.id = comments.postid) AND (comments.score >= 10)
select temp_id as Id, temp_title as Title, temp_comments as Text
from temp
where temp_pti = 6
order by Id asc;
"""
### 13. Generate a list - (Badge_Name, Num_Users) - containing the different
### badges, and the number of users who received those badges.
### Note: A user may receive the same badge multiple times -- they should only
### Output columns: Badge_name, Num_users
### Order by Badge_name asc
### Use LIMIT to limit the output to first 20 rows.
queries[13] = """
with unique_badges as (
select DISTINCT name as names, userid as userids from badges order by names
),
distinct_names as (select DISTINCT names as distinct_name from unique_badges or
select distinct_name as Badge_name, (select count(names) as Num_users from uniq
from distinct_names
order by Badge_name asc
LIMIT 20;
"""
### 14. For each post, count the number of comments for that post.
###
### One way to do this is "Scalar" subqueries in the select clause.
### select Id,
### (select count(*) from comments where comments.postid = posts.id) as Num_com
### from posts order by posts.id;
###
### However, this takes too long, even on the relatively small database we
### have.
###
### Instead, use "left outer join" to do this task.
###
### Output Columns: Id, Num_Comments
### Order by: Id ascending
queries[14] = """
select posts.id as Id, count(comments.postid) as Num_Comments
from posts left join comments on posts.id = comments.postid
GROUP BY posts.id
order by Id asc;
"""
### 15. Generate a list - (Reputation, Num_Users) - containing the number
### of users with reputation between 1 and 100 (inclusive). If a particular rep
### score does not have any users (e.g., 2), then that reputation should appear
### 0 count.
###
### HINT: Use "generate_series()" to create an inline table -- try
### "select * from generate_series(1, 10) as g(n);" to see how it works.
### This is what's called a "set returning function", and the result can be use
### See: https://www.postgresql.org/docs/12/functions-srf.html
###
### Output columns: Reputation, Num_users
### Order by Reputation ascending
queries[15] = """
with temp as
(SELECT Reputation, id
FROM users
WHERE Reputation BETWEEN 1 AND 100
ORDER BY Reputation),
temp2 as
(SELECT Reputation, count(id) as Num_Users
FROM temp
GROUP BY Reputation
ORDER BY Reputation asc),
indices as
(Select Generate_series (1,100) as mySeries),
temp3 as
(SELECT indices.mySeries as Reputation, temp2.Num_Users
FROM indices
LEFT JOIN temp2
ON indices.mySeries = temp2.Reputation
ORDER BY Reputation ASC)
SELECT Reputation,
CASE
WHEN Num_Users IS NOT NULL THEN Num_Users
WHEN Num_Users IS NULL THEN 0
END AS Num_Users
FROM temp3
"""
### 16. Generalizing #14 above, associate posts with both the number of
### comments and the number of votes
###
### As above, using scalar subqueries won't scale to the number of tuples.
### Instead use WITH and Left Outer Joins.
###
### Output Columns: Id, Num_Comments, Num_Votes
### Order by: Id ascending
queries[16] = """
with temp as
(select posts.id as Id, count(comments.postid) as Num_Comments
from posts left join comments on posts.id = comments.postid
GROUP BY posts.id
order by Id asc),
temp2 as (select posts.id as Id2, count(votes.postid) as Num_Votes
from posts left join votes on posts.id = votes.postid
GROUP BY posts.id
order by Id2 asc)
SELECT temp.Id as Id, temp.Num_Comments as Num_Comments, temp2.Num_Votes as Num
FROM temp inner join temp2
on temp.Id = temp2.Id2
order by Id asc;
"""
### 17. Write a query to find the posts with at least 7 children (i.e., at
### least 7 other posts with that post as the parent
###
### Output columns: Id, Title
### Order by: Id ascending
queries[17] = """
with parents as
(select Id, Title
from posts
where posttypeid = 1
),
children as
(select Id, parentId
from posts
where posttypeid = 2),
families as
(select parents.Id as Id, count(children.Id) as Num_Children, parents.Title
from parents left join children on parents.Id = children.parentId
GROUP BY parents.Id, parents.Title
order by Id asc
select Id, Title
from families
where Num_Children >=7;
"""
### 18. Find posts such that, between the post and its children (i.e., answers
### to that post), there are a total of 100 or more votes
###
### HINT: Use "union all" to create an appropriate temp table using WITH
###
### Output columns: Id, Title
### Order by: Id ascending
# temp is a table with a list of ids of posts that are answers and questions i.
queries[18] = """
with temp as
(select Id, Id as parentID, Title from posts WHERE posttypeid = 1
UNION ALL
select ID, parentId, Title from posts Where posttypeid = 2
),
temp2 as
(select temp.ParentId as ParentId, temp.Title as Title, votes.id as votes_i
FROM temp LEFT JOIN votes ON votes.postid = temp.Id
),
temp3 as
(select ParentID as Id, count(votes_id) as Num_Votes
from temp2 GROUP BY Id
order by Id asc
),
temp4 as
(select Id
from temp3 where Num_Votes >=100
SELECT DISTINCT temp4.Id, posts.title
FROM temp4
INNER JOIN posts
ON temp4.Id = posts.id;
"""
### 19. Write a query to find posts where the post and the accepted answer
### are both owned by the same user (i.e., have the same "OwnerUserId") and the
### user has not made any other post (outside of those two).
###
### Hint: Use "not exists" for the last one.
###
### Output columns: Id, Title
### Order by: Id Ascending
queries[19] = """
with temp as
(SELECT users.id as UserId, posts.id as PostId, posts.posttypeid as posttyp
FROM posts
LEFT JOIN users
ON users.id = posts.owneruserid
WHERE posts.posttypeid = 1),
tempfreq as
(SELECT users.id as UserId, posts.id as PostId, posts.posttypeid as posttyp
FROM posts
LEFT JOIN users
ON users.id = posts.owneruserid),
UsersPosts as
(select UserId, count(PostId) as Frequency
from tempfreq
GROUP BY UserId
order by Frequency),
temp2 as
(SELECT users.id as UserId, posts.id as PostId, posts.posttypeid as posttyp
FROM posts
LEFT JOIN users
ON users.id = posts.owneruserid
WHERE posts.posttypeid = 2),
temp3 as
SELECT temp.userid as Id, temp.Title as Title, temp.answerid as answerId, t
FROM temp
INNER JOIN temp2
ON temp.answerid = temp2.postid
ORDER BY Id asc
),
temp4 as
SELECT temp3.Id as Id, temp3.Title as Title, temp3.answerid as answerId, te
FROM UsersPosts
INNER JOIN temp3
ON UsersPosts.UserId = temp3.Id
WHERE UsersPosts.Frequency =2
select answererid as Id, Title from temp4
where Id = answererId
Order by Id asc
"""