-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Former-commit-id: 48891c5
- Loading branch information
lucananni93
committed
Mar 12, 2017
1 parent
a2f064b
commit a3eecfa
Showing
17 changed files
with
332 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
# Created by https://www.gitignore.io/api/pycharm,python | ||
|
||
### PyCharm ### | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm | ||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | ||
|
||
# User-specific stuff: | ||
.idea/**/workspace.xml | ||
.idea/**/tasks.xml | ||
|
||
# Sensitive or high-churn files: | ||
.idea/**/dataSources/ | ||
.idea/**/dataSources.ids | ||
.idea/**/dataSources.xml | ||
.idea/**/dataSources.local.xml | ||
.idea/**/sqlDataSources.xml | ||
.idea/**/dynamic.xml | ||
.idea/**/uiDesigner.xml | ||
|
||
# Gradle: | ||
.idea/**/gradle.xml | ||
.idea/**/libraries | ||
|
||
# Mongo Explorer plugin: | ||
.idea/**/mongoSettings.xml | ||
|
||
## File-based project format: | ||
*.iws | ||
|
||
## Plugin-specific files: | ||
|
||
# IntelliJ | ||
/out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
fabric.properties | ||
|
||
### PyCharm Patch ### | ||
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 | ||
|
||
# *.iml | ||
# modules.xml | ||
# .idea/misc.xml | ||
# *.ipr | ||
|
||
### Python ### | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*,cover | ||
.hypothesis/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# dotenv | ||
.env | ||
|
||
# virtualenv | ||
.venv | ||
venv/ | ||
ENV/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# End of https://www.gitignore.io/api/pycharm,python |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,10 @@ | ||
# GMQL-Python | ||
Python-Spark implementation of the GMQL system | ||
|
||
## Requirements | ||
- A python environment | ||
- Apache Spark | ||
|
||
##Set up of the project | ||
1. Download this repository | ||
2. In your IDE add the following paths to the project |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
""" | ||
Setting up the pyspark environment | ||
""" | ||
spark_home = '/home/luca/spark-2.1.0-bin-hadoop2.7' | ||
|
||
import findspark | ||
findspark.init(spark_home=spark_home) | ||
import pyspark | ||
|
||
app_name = 'gmql_spark' | ||
|
||
# getting the Spark context | ||
sc = pyspark.SparkContext(appName=app_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from gmql import sc | ||
|
||
class GMQLDataset: | ||
|
||
def load_from_path(self, path, parser): | ||
rdd = sc.textFile(path).map(parser.parse_line) | ||
return rdd |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from .Parser import Parser | ||
from pybedtools import BedTool | ||
|
||
|
||
class BedParser(Parser): | ||
|
||
header = ['chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', | ||
'itemRgb', 'blockCount', 'blockSizes', 'blockStarts'] | ||
|
||
def parse_line(self, line): | ||
elems = line.split("\t") | ||
n_fields = len(elems) | ||
dict_row = {} | ||
for i in range(n_fields): | ||
dict_row[self.header[i]] = elems[i] | ||
return dict_row | ||
|
||
|
||
def parse_to_dataframe(self, file_path): | ||
bed_file = BedTool(file_path) | ||
df = bed_file.to_dataframe() | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
|
||
|
||
class Parser: | ||
|
||
def parse_line(self,): | ||
pass | ||
|
Empty file.
Empty file.
Empty file.
1 change: 1 addition & 0 deletions
1
resources/ENCSR373BIX_rep2_1_pe_bwa_biorep_filtered_hotspots.bed.REMOVED.git-id
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
2999cfa90579accd1ca723b562464c86913bf34a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from setuptools import setup | ||
|
||
setup(name='gmql', | ||
version='0.1', | ||
description='Python library for GMQL computation', | ||
url='https://github.com/lucananni93/GMQL-Python', | ||
author='Luca Nanni', | ||
author_email='luca.nanni@mail.polimi.it', | ||
license='MIT', | ||
packages=['gmql'], | ||
requires=['findspark'], | ||
zip_safe=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
""" | ||
Main of example that demonstrate: | ||
1) the usage of the spylon package | ||
2) how to call GMQL scala functions from Python | ||
""" | ||
|
||
import spylon.spark as sc | ||
|
||
c = sc.SparkConfiguration() | ||
c._spark_home = "/home/luca/spark-2.1.0-bin-hadoop2.7" | ||
|
||
# I add the GMQL jar files for accessing them from pyspark | ||
c.jars = ["/home/luca/Scrivania/GMQL/GMQL-Core/target/GMQL-Core-2.0.jar", | ||
"/home/luca/Scrivania/GMQL/GMQL-Server/target/GMQL-Server-2.0.jar", | ||
"/home/luca/Scrivania/GMQL/GMQL-Spark/target/GMQL-Spark-4.0.jar"] | ||
|
||
|
||
path_file = "/home/luca/Scrivania/GMQL-Python/resources/ENCSR373BIX_rep2_1_pe_bwa_biorep_filtered_hotspots.bed" | ||
path_output = "/home/luca/Scrivania/GMQL-Python/resources/" | ||
|
||
# I get the pyspark context from Spylon | ||
sc = c.spark_context("prova_spylon") | ||
|
||
# Instantiation of a GMQLSparkExecutor | ||
binsize = sc._jvm.it.polimi.genomics.core.BinSize(5000, 5000, 1000) | ||
maxBinDistance = 100000 | ||
REF_PARALLILISM = 20 | ||
testingIOFormats = False | ||
sparkContext = sc._jvm.org.apache.spark.SparkContext.getOrCreate() | ||
outputFormat = sc._jvm.it.polimi.genomics.core.GMQLOutputFormat.TAB() | ||
|
||
gmql_spark_executor = sc._jvm.it.polimi.genomics.\ | ||
spark.implementation.GMQLSparkExecutor(binsize, maxBinDistance, REF_PARALLILISM, | ||
testingIOFormats, sparkContext, outputFormat) | ||
|
||
# Creation of a GMQLServer given the spark executor previously defined | ||
server = sc._jvm.it.polimi.genomics.GMQLServer.GmqlServer(gmql_spark_executor, None) | ||
|
||
# Getting a BedParser object from GMQL | ||
bedParser = sc._jvm.it.polimi.genomics.spark.implementation.loaders.BedParser | ||
|
||
# Executing the query | ||
DS1 = server.READ(path_file).USING(bedParser) | ||
|
||
|
||
|
||
print("DONE") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import gmql as gl | ||
from gmql.dataset.GMQLDataset import GMQLDataset | ||
from gmql.dataset.parsers.BedParser import BedParser | ||
|
||
bed_path = "/home/luca/Scrivania/GMQL-Python/resources/ENCSR373BIX_rep2_1_pe_bwa_biorep_filtered_hotspots.bed" | ||
|
||
dataset = GMQLDataset() | ||
bed_parser = BedParser() | ||
|
||
print('starting reading bed file') | ||
bed_rdd = dataset.load_from_path(path=bed_path, parser=bed_parser) | ||
bed_rdd.take(10).collect |