library(STRINGdb)
## Loading required package: png
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: plyr
## Loading required package: igraph
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
##
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: plotrix
## Loading required package: RColorBrewer
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:plotrix':
##
## plotCI
##
## The following object is masked from 'package:stats':
##
## lowess
##
## Loading required package: hash
## hash-2.2.6 provided by Decision Patterns
Liste des espèces
head(get_STRING_species(version='10', species_name=NULL))
## species_id official_name
## 1 394 Sinorhizobium fredii NGR234
## 2 882 Desulfovibrio vulgaris str. Hildenborough
## 3 883 Desulfovibrio vulgaris str. Miyazaki F
## 4 1140 Synechococcus elongatus PCC 7942
## 5 1148 Synechocystis sp. PCC 6803
## 6 2110 Mycoplasma agalactiae
## compact_name kingdom type
## 1 Sinorhizobium fredii NGR234 bacteria periphery
## 2 Desulfovibrio vulgaris Hildenborough bacteria periphery
## 3 Desulfovibrio vulgaris Miyazaki bacteria periphery
## 4 Synechococcus elongatus PCC7942 bacteria periphery
## 5 Synechocystis sp. PCC6803 bacteria core
## 6 Mycoplasma agalactiae bacteria periphery
Instantiation de string reference class à partir de l’id taxonomique (9606 pour human). threshold par défault est 400 (d’après la doc). input_directory permet de spécifier où seront stocker les fichiers téléchargés.
sdb = STRINGdb$new(version='10', species=9606, score_threshold=0, input_directory='repo_data')
Téléchargement des interactions
g=sdb$get_graph()
g
## IGRAPH UN-- 19247 4274001 --
## + attr: name (v/c), neighborhood (e/n), neighborhood_transferred
## | (e/n), fusion (e/n), cooccurence (e/n), homology (e/n),
## | coexpression (e/n), coexpression_transferred (e/n), experiments
## | (e/n), experiments_transferred (e/n), database (e/n),
## | database_transferred (e/n), textmining (e/n),
## | textmining_transferred (e/n), combined_score (e/n)
## + edges (vertex names):
## [1] 9606.ENSP00000003084--9606.ENSP00000301645
## [2] 9606.ENSP00000003084--9606.ENSP00000301653
## [3] 9606.ENSP00000003084--9606.ENSP00000301732
## + ... omitted several edges
head(read.table('repo_data/9606__protein_links.tsv.gz', header=TRUE))
## protein1 protein2 neighborhood
## 1 9606.ENSP00000003084 9606.ENSP00000301645 0
## 2 9606.ENSP00000003084 9606.ENSP00000301653 0
## 3 9606.ENSP00000003084 9606.ENSP00000301732 0
## 4 9606.ENSP00000003084 9606.ENSP00000301891 0
## 5 9606.ENSP00000003084 9606.ENSP00000301905 0
## 6 9606.ENSP00000003084 9606.ENSP00000301956 0
## neighborhood_transferred fusion cooccurence homology coexpression
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 609 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## coexpression_transferred experiments experiments_transferred database
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 101 0
## 4 0 0 0 0
## 5 0 0 75 0
## 6 0 0 0 0
## database_transferred textmining textmining_transferred combined_score
## 1 0 163 129 239
## 2 0 241 0 240
## 3 0 413 287 292
## 4 0 71 246 269
## 5 0 92 161 233
## 6 0 56 155 168
Trop facile !
Liste des méthodes disponibles
STRINGdb$methods()
## [1] "add_diff_exp_color"
## [2] "add_proteins_description"
## [3] "benchmark_ppi"
## [4] "benchmark_ppi_pathway_view"
## [5] "callSuper"
## [6] "copy"
## [7] "enrichment_heatmap"
## [8] "export"
## [9] "field"
## [10] "get_aliases"
## [11] "get_annotations"
## [12] "get_annotations_desc"
## [13] "get_bioc_graph"
## [14] "get_clusters"
## [15] "get_enrichment"
## [16] "get_graph"
## [17] "get_homologs"
## [18] "get_homologs_besthits"
## [19] "get_homology_graph"
## [20] "get_interactions"
## [21] "get_link"
## [22] "get_neighbors"
## [23] "get_pathways_benchmarking_blackList"
## [24] "get_png"
## [25] "get_ppi_enrichment"
## [26] "get_ppi_enrichment_full"
## [27] "get_proteins"
## [28] "get_pubmed"
## [29] "get_pubmed_interaction"
## [30] "get_subnetwork"
## [31] "get_summary"
## [32] "get_term_proteins"
## [33] "getClass"
## [34] "getRefClass"
## [35] "import"
## [36] "initFields"
## [37] "initialize"
## [38] "load"
## [39] "load_all"
## [40] "map"
## [41] "mp"
## [42] "plot_network"
## [43] "plot_ppi_enrichment"
## [44] "post_payload"
## [45] "remove_homologous_interactions"
## [46] "set_background"
## [47] "show"
## [48] "show#envRefClass"
## [49] "trace"
## [50] "untrace"
## [51] "usingMethods"
Les protéines et leurs annotations
sp=sdb$get_proteins()
head(sp)
## protein_external_id preferred_name protein_size
## 1 9606.ENSP00000376213 KIAA1683 1367
## 2 9606.ENSP00000464271 ENSG00000265690 145
## 3 9606.ENSP00000472357 FLJ00297 141
## 4 9606.ENSP00000007390 TSR3 312
## 5 9606.ENSP00000000233 ARF5 180
## 6 9606.ENSP00000259467 PDCL 301
## annotation
## 1 KIAA1683
## 2 annotation not available
## 3 FLJ00297 protein
## 4 TSR3, 20S rRNA accumulation, homolog (S. cerevisiae); Probable pre-rRNA processing protein involved in ribosome biogenesis (By similarity)
## 5 ADP-ribosylation factor 5; GTP-binding protein that functions as an allosteric activator of the cholera toxin catalytic subunit, an ADP- ribosyltransferase. Involved in protein trafficking; may modulate vesicle budding and uncoating within the Golgi apparatus
## 6 phosducin-like
map identifiers
p53=sdb$mp('p53')
p53
## [1] "9606.ENSP00000269305"
Homologues chez la souris
sdb$get_homologs(p53,10090)
## STRING_id_a STRING_id_b bitscore start_a end_a
## 1 9606.ENSP00000269305 10090.ENSMUSP00000101269 270 97 352
## 2 9606.ENSP00000269305 10090.ENSMUSP00000104298 578 1 393
## 3 9606.ENSP00000269305 10090.ENSMUSP00000110965 269 94 350
## start_b end_b size_b
## 1 114 378 638
## 2 4 390 390
## 3 162 423 680
Les voisins
head(sdb$get_neighbors(p53))
## [1] "9606.ENSP00000003084" "9606.ENSP00000003100" "9606.ENSP00000003302"
## [4] "9606.ENSP00000004921" "9606.ENSP00000005257" "9606.ENSP00000005260"
les interactions
sdb$get_interactions( c(p53, '9606.ENSP00000003084') )
## from to neighborhood
## 1 9606.ENSP00000003084 9606.ENSP00000269305 0
## neighborhood_transferred fusion cooccurence homology coexpression
## 1 0 0 0 0 0
## coexpression_transferred experiments experiments_transferred database
## 1 0 0 0 0
## database_transferred textmining textmining_transferred combined_score
## 1 0 392 0 392
pubmed
head( sdb$get_pubmed_interaction( p53, '9606.ENSP00000003084' ) )
## [1] "PMID:9895335" "PMID:9860803" "PMID:9847078" "PMID:9821189"
## [5] "PMID:9819431" "PMID:9811682"
Autres méthodes
# recupère les GO, InterPro, KEGG associés à chaque sommet
head(sdb$get_annotations())
## STRING_id term_id category type
## 1 9606.ENSP00000372313 GO:0004674 Function
## 2 9606.ENSP00000372313 GO:0004672 Function
## 3 9606.ENSP00000372313 GO:0016773 Function
## 4 9606.ENSP00000372313 GO:0016772 Function
## 5 9606.ENSP00000372313 GO:0016740 Function
## 6 9606.ENSP00000372313 GO:0003824 Function
#
# sdb$get_bioc_graph() # trop groumand en mémoire
#
head(sdb$get_pubmed(p53))
## [1] "PMID:9990839" "PMID:9990486" "PMID:9990480" "PMID:9990085"
## [5] "PMID:9990040" "PMID:9989991"
Test avec le peptidoglycan de coli
get_STRING_species(version='10', species_name="Escherichia coli")
## species_id official_name
## 236 155864 Escherichia coli O157:H7 str. EDL933
## 295 199310 Escherichia coli CFT073
## 632 316385 Escherichia coli str. K-12 substr. DH10B
## 633 316407 Escherichia coli str. K-12 substr. W3110
## 753 362663 Escherichia coli 536
## 1060 469008 Escherichia coli BL21(DE3)
## 1102 481805 Escherichia coli ATCC 8739
## 1155 511145 Escherichia coli str. K-12 substr. MG1655
## compact_name kingdom type
## 236 Escherichia coli O157H7 bacteria periphery
## 295 Escherichia coli CFT073 bacteria periphery
## 632 Escherichia coli K12 DH10B bacteria periphery
## 633 Escherichia coli K12 W3110 bacteria periphery
## 753 Escherichia coli 536 bacteria periphery
## 1060 Escherichia coli BL21DE3 bacteria periphery
## 1102 Escherichia coli ATCC8739 bacteria periphery
## 1155 Escherichia coli K12 MG1655 bacteria core
sdb = STRINGdb$new(version='10', species=511145, score_threshold=350, input_directory='repo_data')
core_ids=c('MURA','MURB','MURC','MURD','MURE','MURF','MRAY','MURG' )
core=sdb$mp(core_ids)
links_core=sdb$get_interactions(core)
head(links_core)
## from to neighborhood neighborhood_transferred fusion
## 1 511145.b0085 511145.b0086 859 513 899
## 2 511145.b0085 511145.b0087 859 635 0
## 3 511145.b0085 511145.b0088 859 513 0
## 4 511145.b0085 511145.b0090 859 513 0
## 5 511145.b0085 511145.b0091 681 642 0
## 6 511145.b0085 511145.b3189 0 120 0
## cooccurence homology coexpression coexpression_transferred experiments
## 1 771 0 195 0 310
## 2 751 0 188 0 0
## 3 772 0 449 0 0
## 4 734 0 101 84 852
## 5 768 0 0 0 0
## 6 752 0 0 0 0
## experiments_transferred database database_transferred textmining
## 1 0 900 0 0
## 2 0 0 0 869
## 3 0 900 0 931
## 4 85 0 0 870
## 5 0 0 0 410
## 6 0 0 0 829
## textmining_transferred combined_score
## 1 176 999
## 2 710 999
## 3 710 999
## 4 709 999
## 5 284 986
## 6 696 987
sdb$plot_network(core)
biosynthesis_ids=c('MURA','MURB','MURC','MURD','MURE','MURF','MRAY','MURG','GLMU','MRSA','GLMS','BACA','YAES','DDLA','DDLB','DADX','MURI','DAPF','DAPE','ARGD','DAPD','DAPB','DAPA','ASD','LYSC' )
biosynthesis=sdb$mp(biosynthesis_ids)
links_biosynthesis = sdb$get_interactions(biosynthesis)
sdb$plot_network(biosynthesis)
Test avec quelques gènes du pathway TRNA-CHARGING-PWY de BioCyc
tRNAcharging_ids = c('ALAS','ARGS','ASNS','ASPS','CYSS','GLTX','GLYQ','GLYS','HISS','ILES')
tRNAcharging=sdb$mp(tRNAcharging_ids)
links_tRNA = sdb$get_interactions(tRNAcharging)
head(links_tRNA)
## from to neighborhood neighborhood_transferred fusion
## 1 511145.b0026 511145.b0526 0 0 0
## 2 511145.b0026 511145.b0930 0 0 0
## 3 511145.b0026 511145.b1866 0 0 0
## 4 511145.b0026 511145.b1876 0 0 0
## 5 511145.b0026 511145.b2400 0 0 0
## 6 511145.b0026 511145.b2514 0 0 0
## cooccurence homology coexpression coexpression_transferred experiments
## 1 0 0 0 329 0
## 2 0 0 225 228 0
## 3 150 0 0 403 0
## 4 0 0 0 243 0
## 5 575 0 0 233 0
## 6 403 0 105 209 0
## experiments_transferred database database_transferred textmining
## 1 0 0 0 346
## 2 850 0 463 0
## 3 0 0 0 0
## 4 878 0 463 637
## 5 695 0 416 658
## 6 0 0 0 656
## textmining_transferred combined_score
## 1 282 657
## 2 151 951
## 3 65 484
## 4 631 992
## 5 488 987
## 6 683 945
sdb$plot_network(tRNAcharging)