Skip to content

Commit

Permalink
really first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvainrocheleau committed Feb 8, 2016
0 parents commit 5db386b
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 0 deletions.
2 changes: 2 additions & 0 deletions OCI-IFO/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Some stuff
# OCI-IFO
14 changes: 14 additions & 0 deletions OCI-IFO/data_samples/article.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_id": ObjectId("56b782be1d41c83ce0c7010b"),
"author": "Jon Azpiri",
"title": "2-year-old girl in Langley dies in ‘tragic accident’",
"full_article": "<p>A two-year-old girl who was found in a Langley pond has died.</p><p>Neighbours, RCMP and search and rescue crews scoured an area around Robertson Crescent early Saturday evening for signs of the girl after she was reported missing. Shortly after 7 p.m., they discovered the girl in a pond they had drained.</p><p>She was rushed to hospital in grave condition. RCMP said she has since passed away.</p><p>Police described the incident as a “tragic accident” and said there were no signs of foul play.</p><p>No names have been released.</p><p class=\"story-ad align-middle\"></p>",
"in_drupal": false,
"language": "en",
"media_url": "http://globalnews.ca/",
"original_article_url": "http://globalnews.ca/news/2502837/2-year-old-girl-in-langley-dies-in-tragic-accident/",
"published_date": "Sun, 07 Feb 2016 17:35:42 +0000",
"redirected_article_url": "http://globalnews.ca/news/2502837/2-year-old-girl-in-langley-dies-in-tragic-accident/",
"rss_url": "http://globalnews.ca/bc/feed/",
"scraped_on": NumberInt(1454867134)
}
13 changes: 13 additions & 0 deletions OCI-IFO/data_samples/media.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"_id": ObjectId("56993b2d1d41c8155edcc182"),
"original_website_url": "http://www.nationalpost.com",
"organisation_type": "media",
"wiki_url": "http://en.wikipedia.org/wiki/National_Post",
"dbpedia_url": "http://dbpedia.org/resource/National_Post",
"organisation": "National Post",
"avgcirculation": "92212.666666666666667",
"redirected_website_url": "http://www.nationalpost.com/index.html",
"scraped_on": NumberInt(1452882733),
"dbpedia_owner": "http://dbpedia.org/resource/Postmedia_Network",
"owner": "Postmedia Network Canada Corporation"
}
8 changes: 8 additions & 0 deletions OCI-IFO/data_samples/rss_feed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"_id": ObjectId("56aa81491d41c866f058a0dc"),
"rss_url": "http://www.chroniclejournal.com/search/?c%5B%5D=news%2Flocal%2Cnews%2Flocal%2F%2A&d=&d1=&d2=&f=rss&l=100&nsa=eedition&q=&s=start_time&sd=desc&t=article",
"language": "en",
"valid_rss": NumberInt(1),
"scraped_on": NumberInt(1454014793),
"media_url": "http://www.chroniclejournal.com/"
}
33 changes: 33 additions & 0 deletions OCI-IFO/php/articles_by_keywords.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

try {
// connection to MongoDB on the IFO server
$conn = new Mongo('mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR');
$db = $conn->scrapy;
$collection = $db->articles;

//Query to MongoDB
//This query looks for a particular keyword in articles. "TransCanada" may be replaced by any keywords.
$articles = array(
'full_article' => array('$regex' => new MongoRegex("/^$TransCanada/i"))
);

$json = array();
$articles = $collection->find($articles)->limit(1000);
foreach ($articles as $obj => $value) {
$x = (array(
$obj => $value
)
);
$json[] = $x;
}
echo json_encode($json);

// disconnect from server
$conn->close();
} catch (MongoConnectionException $e) {
die('Error connecting to MongoDB server');
} catch (MongoException $e) {
die('Error: ' . $e->getMessage());
}
?>
19 changes: 19 additions & 0 deletions OCI-IFO/python/articles_by_author.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pymongo import MongoClient

# connection to MongoDB on the IFO server
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR"
client = MongoClient(uri)
db = client.scrapy
collection = db.articles

#uncomment to save output to file
#f = open('data.txt', 'w')

#Query to MongoDB
#This query outputs a list of articles wrtitten by an author.
articles = collection.find({"author": "Ryan White" }).limit( 1000 )
for x in articles:
print x
#uncomment to save output to file
# print >>f, x

19 changes: 19 additions & 0 deletions OCI-IFO/python/articles_by_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pymongo import MongoClient

# connection to MongoDB on the IFO server
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR"
client = MongoClient(uri)
db = client.scrapy
collection = db.articles

#uncomment to save output to file
#f = open('data.txt', 'w')

#Query to MongoDB
#This query looks for a particular keyword in articles. TransCanada may be replaced by anything
articles = collection.find({"full_article": {'$regex':'TransCanada'}}).limit( 1000 )
for x in articles:
print x
#uncomment to save output to file
# print >>f, x

19 changes: 19 additions & 0 deletions OCI-IFO/python/articles_by_media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pymongo import MongoClient

# connection to MongoDB on the IFO server
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR"
client = MongoClient(uri)
db = client.scrapy
collection = db.articles

#uncomment to save output to file
#f = open('data.txt', 'w')

#Query to MongoDB
#This query outputs a list of articles published by a media.
articles = collection.find({"media_url": "http://www.thestar.com/" }).limit( 1000 )
for x in articles:
print x
#uncomment to save output to file
# print >>f, x

19 changes: 19 additions & 0 deletions OCI-IFO/python/media_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pymongo import MongoClient

# connection to MongoDB on the IFO server
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR"
client = MongoClient(uri)
db = client.scrapy
collection = db.websites

#uncomment to save output to file
#f = open('data.txt', 'w')

#Query to MongoDB
#This query outputs a list media in the database.
media = collection.find({"original_website_url": {"$exists" : 1}}).limit( 1000 )
for x in media:
print x
#uncomment to save output to file
# print >>f, x

19 changes: 19 additions & 0 deletions OCI-IFO/python/media_newspapers_by_circulation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pymongo import MongoClient

# connection to MongoDB on the IFO server
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR"
client = MongoClient(uri)
db = client.scrapy
collection = db.websites

#uncomment to save output to file
#f = open('data.txt', 'w')

#Query to MongoDB
#This query outputs a list newspapers and their circulation.
media = collection.find({"avgcirculation": {"$ne" : None}}).limit( 1000 )
for x in media:
print x
#uncomment to save output to file
# print >>f, x

0 comments on commit 5db386b

Please sign in to comment.